Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
8aa08ca2
提交
8aa08ca2
authored
2月 20, 2012
作者:
lamblin
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #450 from nouiz/gpusum
Test nvidia driver
上级
8b1c4916
07deef6b
隐藏空白字符变更
内嵌
并排
正在显示
15 个修改的文件
包含
669 行增加
和
424 行删除
+669
-424
__init__.py
theano/__init__.py
+5
-0
GpuConv3D.py
theano/sandbox/cuda/GpuConv3D.py
+2
-2
GpuConvGrad3D.py
theano/sandbox/cuda/GpuConvGrad3D.py
+3
-2
GpuConvTransp3D.py
theano/sandbox/cuda/GpuConvTransp3D.py
+3
-2
__init__.py
theano/sandbox/cuda/__init__.py
+67
-36
basic_ops.py
theano/sandbox/cuda/basic_ops.py
+29
-15
blas.py
theano/sandbox/cuda/blas.py
+10
-9
nnet.py
theano/sandbox/cuda/nnet.py
+5
-4
rng_curand.py
theano/sandbox/cuda/rng_curand.py
+2
-2
test_basic_ops.py
theano/sandbox/cuda/tests/test_basic_ops.py
+464
-346
test_driver.py
theano/sandbox/cuda/tests/test_driver.py
+64
-0
var.py
theano/sandbox/cuda/var.py
+6
-0
multinomial.py
theano/sandbox/multinomial.py
+2
-2
neighbours.py
theano/sandbox/neighbours.py
+2
-2
rng_mrg.py
theano/sandbox/rng_mrg.py
+5
-2
没有找到文件。
theano/__init__.py
浏览文件 @
8aa08ca2
...
@@ -99,6 +99,11 @@ import gof
...
@@ -99,6 +99,11 @@ import gof
if
config
.
device
.
startswith
(
'gpu'
)
or
config
.
init_gpu_device
.
startswith
(
'gpu'
):
if
config
.
device
.
startswith
(
'gpu'
)
or
config
.
init_gpu_device
.
startswith
(
'gpu'
):
import
theano.sandbox.cuda
import
theano.sandbox.cuda
# We can't test the driver during import of theano.sandbox.cuda as
# this cause circular import dependency. So we also test it manually
# after the import
import
theano.sandbox.cuda.tests.test_driver
theano
.
sandbox
.
cuda
.
tests
.
test_driver
.
test_nvidia_driver1
()
# Use config.numpy to call numpy.seterr
# Use config.numpy to call numpy.seterr
import
numpy
import
numpy
...
...
theano/sandbox/cuda/GpuConv3D.py
浏览文件 @
8aa08ca2
...
@@ -7,9 +7,9 @@ from theano.sandbox.cuda.basic_ops import as_cuda_ndarray_variable, host_from_gp
...
@@ -7,9 +7,9 @@ from theano.sandbox.cuda.basic_ops import as_cuda_ndarray_variable, host_from_gp
from
theano.misc
import
strutil
from
theano.misc
import
strutil
from
theano.tensor.nnet.Conv3D
import
Conv3D
from
theano.tensor.nnet.Conv3D
import
Conv3D
from
theano.sandbox.cuda.opt
import
register_opt
from
theano.sandbox.cuda.opt
import
register_opt
from
theano.sandbox.cuda
import
CudaNdarrayType
from
theano.sandbox.cuda
import
CudaNdarrayType
,
GpuOp
class
GpuConv3D
(
theano
.
Op
):
class
GpuConv3D
(
Gpu
Op
):
""" GPU implementation of Conv3D """
""" GPU implementation of Conv3D """
def
__eq__
(
self
,
other
):
def
__eq__
(
self
,
other
):
...
...
theano/sandbox/cuda/GpuConvGrad3D.py
浏览文件 @
8aa08ca2
...
@@ -8,11 +8,12 @@ from theano.misc import strutil
...
@@ -8,11 +8,12 @@ from theano.misc import strutil
from
theano.tensor.nnet.ConvGrad3D
import
ConvGrad3D
from
theano.tensor.nnet.ConvGrad3D
import
ConvGrad3D
from
theano.sandbox.cuda.opt
import
register_opt
from
theano.sandbox.cuda.opt
import
register_opt
from
theano.sandbox.cuda
import
CudaNdarrayType
,
HostFromGpu
,
host_from_gpu
from
theano.sandbox.cuda
import
(
CudaNdarrayType
,
HostFromGpu
,
host_from_gpu
,
GpuOp
)
class
GpuConvGrad3D
(
theano
.
Op
):
class
GpuConvGrad3D
(
Gpu
Op
):
""" GPU version of gradient of ConvGrad3D with respect to W """
""" GPU version of gradient of ConvGrad3D with respect to W """
def
make_node
(
self
,
V
,
d
,
WShape
,
dCdH
):
def
make_node
(
self
,
V
,
d
,
WShape
,
dCdH
):
...
...
theano/sandbox/cuda/GpuConvTransp3D.py
浏览文件 @
8aa08ca2
...
@@ -9,10 +9,11 @@ from theano.gof import local_optimizer
...
@@ -9,10 +9,11 @@ from theano.gof import local_optimizer
from
theano.sandbox.cuda.basic_ops
import
as_cuda_ndarray_variable
from
theano.sandbox.cuda.basic_ops
import
as_cuda_ndarray_variable
from
theano.sandbox.cuda.opt
import
register_opt
from
theano.sandbox.cuda.opt
import
register_opt
from
theano.sandbox.cuda
import
CudaNdarrayType
,
HostFromGpu
,
host_from_gpu
from
theano.sandbox.cuda
import
(
CudaNdarrayType
,
HostFromGpu
,
host_from_gpu
,
GpuOp
)
class
GpuConvTransp3D
(
theano
.
Op
):
class
GpuConvTransp3D
(
Gpu
Op
):
""" The gpu version of ConvTransp3D """
""" The gpu version of ConvTransp3D """
def
__eq__
(
self
,
other
):
def
__eq__
(
self
,
other
):
return
type
(
self
)
==
type
(
other
)
return
type
(
self
)
==
type
(
other
)
...
...
theano/sandbox/cuda/__init__.py
浏览文件 @
8aa08ca2
import
atexit
,
logging
,
os
,
shutil
,
stat
,
sys
import
atexit
,
logging
,
os
,
shutil
,
stat
,
sys
import
numpy
import
theano
from
theano.compile
import
optdb
from
theano.compile
import
optdb
from
theano.gof.cmodule
import
get_lib_extension
from
theano.gof.cmodule
import
get_lib_extension
from
theano.configparser
import
config
,
AddConfigVar
,
StrParam
from
theano.configparser
import
config
,
AddConfigVar
,
StrParam
...
@@ -23,7 +27,8 @@ if config.cuda.root == "AUTO":
...
@@ -23,7 +27,8 @@ if config.cuda.root == "AUTO":
# set nvcc_path correctly and get the version
# set nvcc_path correctly and get the version
nvcc_compiler
.
set_cuda_root
()
nvcc_compiler
.
set_cuda_root
()
#is_nvcc_available called here to initialize global vars in nvcc_compiler module
#is_nvcc_available called here to initialize global vars in
#nvcc_compiler module
nvcc_compiler
.
is_nvcc_available
()
nvcc_compiler
.
is_nvcc_available
()
# Compile cuda_ndarray.cu
# Compile cuda_ndarray.cu
...
@@ -31,8 +36,9 @@ nvcc_compiler.is_nvcc_available()
...
@@ -31,8 +36,9 @@ nvcc_compiler.is_nvcc_available()
# printed and this module will not be working properly (we set `cuda_available`
# printed and this module will not be working properly (we set `cuda_available`
# to False).
# to False).
# This variable is True by default, and set to False if nvcc is not available or
# This variable is True by default, and set to False if nvcc is not
# their is no cuda card or something goes wrong when trying to initialize cuda.
# available or their is no cuda card or something goes wrong when
# trying to initialize cuda.
cuda_available
=
True
cuda_available
=
True
# Global variable to avoid displaying the same warning multiple times.
# Global variable to avoid displaying the same warning multiple times.
...
@@ -41,6 +47,7 @@ cuda_warning_is_displayed = False
...
@@ -41,6 +47,7 @@ cuda_warning_is_displayed = False
#This variable is set to True when we enable cuda.(i.e. when use() is called)
#This variable is set to True when we enable cuda.(i.e. when use() is called)
cuda_enabled
=
False
cuda_enabled
=
False
# Code factorized within a function so that it may be called from multiple
# Code factorized within a function so that it may be called from multiple
# places (which is not currently the case, but may be useful in the future).
# places (which is not currently the case, but may be useful in the future).
def
set_cuda_disabled
():
def
set_cuda_disabled
():
...
@@ -72,17 +79,18 @@ libcuda_ndarray_so = os.path.join(cuda_ndarray_loc,
...
@@ -72,17 +79,18 @@ libcuda_ndarray_so = os.path.join(cuda_ndarray_loc,
'libcuda_ndarray.'
+
get_lib_extension
())
'libcuda_ndarray.'
+
get_lib_extension
())
# Add the theano cache directory's cuda_ndarray subdirectory to the list of
# Add the theano cache directory's cuda_ndarray subdirectory to the
# places that are hard-coded into compiled modules' runtime library search
# list of places that are hard-coded into compiled modules' runtime
# list. This works in conjunction with nvcc_compiler.nvcc_module_compile_str
# library search list. This works in conjunction with
# which adds this folder during compilation with -L and also adds -lcuda_ndarray
# nvcc_compiler.nvcc_module_compile_str which adds this folder during
# when compiling modules.
# compilation with -L and also adds -lcuda_ndarray when compiling
# modules.
nvcc_compiler
.
add_standard_rpath
(
cuda_ndarray_loc
)
nvcc_compiler
.
add_standard_rpath
(
cuda_ndarray_loc
)
compile_cuda_ndarray
=
True
compile_cuda_ndarray
=
True
if
os
.
path
.
exists
(
cuda_ndarray_so
):
if
os
.
path
.
exists
(
cuda_ndarray_so
):
compile_cuda_ndarray
=
date
>=
os
.
stat
(
cuda_ndarray_so
)[
stat
.
ST_MTIME
]
compile_cuda_ndarray
=
date
>=
os
.
stat
(
cuda_ndarray_so
)[
stat
.
ST_MTIME
]
if
not
compile_cuda_ndarray
:
if
not
compile_cuda_ndarray
:
try
:
try
:
# If we load a previously-compiled version, config.compiledir should
# If we load a previously-compiled version, config.compiledir should
...
@@ -111,7 +119,7 @@ try:
...
@@ -111,7 +119,7 @@ try:
include_dirs
=
[
cuda_path
],
libs
=
[
'cublas'
])
include_dirs
=
[
cuda_path
],
libs
=
[
'cublas'
])
from
cuda_ndarray.cuda_ndarray
import
*
from
cuda_ndarray.cuda_ndarray
import
*
except
Exception
,
e
:
except
Exception
,
e
:
_logger
.
error
(
"Failed to compile cuda_ndarray.cu:
%
s"
,
str
(
e
))
_logger
.
error
(
"Failed to compile cuda_ndarray.cu:
%
s"
,
str
(
e
))
set_cuda_disabled
()
set_cuda_disabled
()
if
cuda_available
:
if
cuda_available
:
...
@@ -129,10 +137,13 @@ if cuda_available:
...
@@ -129,10 +137,13 @@ if cuda_available:
os
.
symlink
(
cuda_ndarray_so
,
libcuda_ndarray_so
)
os
.
symlink
(
cuda_ndarray_so
,
libcuda_ndarray_so
)
try
:
try
:
# This only test if the cuda driver is available and if there
# is at least one GPU that support cuda. This do not select a
# device.
gpu_init
()
gpu_init
()
cuda_available
=
True
cuda_available
=
True
cuda_initialization_error_message
=
""
cuda_initialization_error_message
=
""
# actively closing our gpu session presents segfault-on-exit on some systems
# actively closing our gpu session presents segfault-on-exit on some systems
atexit
.
register
(
gpu_shutdown
)
atexit
.
register
(
gpu_shutdown
)
except
EnvironmentError
,
e
:
except
EnvironmentError
,
e
:
cuda_available
=
False
cuda_available
=
False
...
@@ -162,7 +173,7 @@ if cuda_available:
...
@@ -162,7 +173,7 @@ if cuda_available:
shared_constructor
=
float32_shared_constructor
shared_constructor
=
float32_shared_constructor
import
basic_ops
import
basic_ops
from
basic_ops
import
(
GpuFromHost
,
HostFromGpu
,
GpuElemwise
,
from
basic_ops
import
(
Gpu
Op
,
Gpu
FromHost
,
HostFromGpu
,
GpuElemwise
,
GpuDimShuffle
,
GpuSum
,
GpuReshape
,
GpuContiguous
,
GpuDimShuffle
,
GpuSum
,
GpuReshape
,
GpuContiguous
,
GpuSubtensor
,
GpuIncSubtensor
,
GpuSubtensor
,
GpuIncSubtensor
,
GpuAdvancedSubtensor1
,
GpuAdvancedIncSubtensor1
,
GpuAdvancedSubtensor1
,
GpuAdvancedIncSubtensor1
,
...
@@ -180,18 +191,31 @@ def use(device,
...
@@ -180,18 +191,31 @@ def use(device,
force
=
False
,
force
=
False
,
default_to_move_computation_to_gpu
=
True
,
default_to_move_computation_to_gpu
=
True
,
move_shared_float32_to_gpu
=
True
,
move_shared_float32_to_gpu
=
True
,
enable_cuda
=
True
):
enable_cuda
=
True
,
test_driver
=
True
):
"""
"""
Error and warning about CUDA should be displayed only when this function is called.
Error and warning about CUDA should be displayed only when this
We need to be able to load this module only to check if it is available!
function is called. We need to be able to load this module only
to check if it is available!
:param device: string "cpu", "gpu", "gpuN" N is the device number to use
:param force: Will always raise an exception if we can't use the gpu.
:param default_to_move_computation_to_gpu: If gpu init succeeded, enable by
default optimization to move
computation to the gpu
:param move_shared_float32_to_gpu: If gpu init succeeded, put new shared
variable in float32 on the gpu.
:param enable_cuda: If the gpu is correctly enabled,
set the the variable cuda_enabled to True.
"""
"""
global
cuda_enabled
,
cuda_initialization_error_message
global
cuda_enabled
,
cuda_initialization_error_message
if
force
and
not
cuda_available
and
device
.
startswith
(
'gpu'
):
if
force
and
not
cuda_available
and
device
.
startswith
(
'gpu'
):
if
not
nvcc_compiler
.
is_nvcc_available
():
if
not
nvcc_compiler
.
is_nvcc_available
():
raise
EnvironmentError
(
"You forced the use of gpu device '
%
s', but
"
raise
EnvironmentError
(
"You forced the use of gpu device '
%
s', but"
"nvcc was not found. Set it in your PATH "
"
nvcc was not found. Set it in your PATH "
"environment variable or set the Theano "
"environment variable or set the Theano "
"flags 'cuda.root' to its directory"
%
device
)
"flags 'cuda.root' to its directory"
""
%
device
)
else
:
else
:
raise
EnvironmentError
(
"You forced the use of gpu device
%
s, "
raise
EnvironmentError
(
"You forced the use of gpu device
%
s, "
"but CUDA initialization failed "
"but CUDA initialization failed "
...
@@ -206,7 +230,8 @@ def use(device,
...
@@ -206,7 +230,8 @@ def use(device,
try
:
try
:
if
cuda_initialization_error_message
:
if
cuda_initialization_error_message
:
error_addendum
=
" (error:
%
s)"
%
cuda_initialization_error_message
error_addendum
=
" (error:
%
s)"
%
cuda_initialization_error_message
except
NameError
:
# cuda_initialization_error_message is not available b/c compilation failed
except
NameError
:
# cuda_initialization_error_message is not available b/c compilation failed
pass
pass
_logger
.
warning
(
'CUDA is installed, but device
%
s is not available
%
s'
,
_logger
.
warning
(
'CUDA is installed, but device
%
s is not available
%
s'
,
device
,
error_addendum
)
device
,
error_addendum
)
...
@@ -222,29 +247,33 @@ def use(device,
...
@@ -222,29 +247,33 @@ def use(device,
raise
ValueError
(
"Invalid device identifier"
,
device
)
raise
ValueError
(
"Invalid device identifier"
,
device
)
if
use
.
device_number
is
None
:
if
use
.
device_number
is
None
:
# No successful call to use() has been made yet
# No successful call to use() has been made yet
if
device
!=
'gpu'
and
device
<
0
:
if
device
!=
'gpu'
and
device
<
0
:
return
return
if
device
in
[
None
,
""
]:
if
device
in
[
None
,
""
]:
device
=
0
device
=
0
try
:
try
:
if
device
!=
'gpu'
:
if
device
!=
'gpu'
:
gpu_init
(
device
)
gpu_init
(
device
)
use
.
device_number
=
device
if
test_driver
:
import
theano.sandbox.cuda.tests.test_driver
theano
.
sandbox
.
cuda
.
tests
.
test_driver
.
test_nvidia_driver1
()
if
move_shared_float32_to_gpu
:
if
move_shared_float32_to_gpu
:
handle_shared_float32
(
True
)
handle_shared_float32
(
True
)
use
.
device_number
=
device
if
enable_cuda
:
if
enable_cuda
:
cuda_enabled
=
True
cuda_enabled
=
True
print
>>
sys
.
stderr
,
"Using gpu device
%
d:
%
s"
%
(
active_device_number
(),
active_device_name
())
print
>>
sys
.
stderr
,
"Using gpu device
%
d:
%
s"
%
(
active_device_number
(),
active_device_name
())
except
(
EnvironmentError
,
ValueError
),
e
:
except
(
EnvironmentError
,
ValueError
),
e
:
_logger
.
error
((
"ERROR: Not using GPU."
_logger
.
error
((
"ERROR: Not using GPU."
" Initialisation of device
%
i failed:
\n
%
s"
),
" Initialisation of device
%
i failed:
\n
%
s"
),
device
,
e
)
device
,
e
)
cuda_enabled
=
False
cuda_enabled
=
False
if
force
:
if
force
:
e
.
args
+=
((
"You asked to force this device and it failed."
e
.
args
+=
((
"You asked to force this device and it failed."
" No fallback to the cpu or other gpu device."
),)
" No fallback to the cpu or other gpu device."
),)
raise
raise
elif
use
.
device_number
!=
device
:
elif
use
.
device_number
!=
device
:
...
@@ -264,17 +293,16 @@ def use(device,
...
@@ -264,17 +293,16 @@ def use(device,
try
:
try
:
#in case the device if just gpu,
#in case the device if just gpu,
# we check that the driver init it correctly.
# we check that the driver init it correctly.
cuda_ndarray
.
cuda_ndarray
.
CudaNdarray
.
zeros
((
5
,
5
))
cuda_ndarray
.
cuda_ndarray
.
CudaNdarray
.
zeros
((
5
,
5
))
except
(
Exception
,
NameError
),
e
:
except
(
Exception
,
NameError
),
e
:
# NameError when no gpu present as cuda_ndarray is not loaded.
# NameError when no gpu present as cuda_ndarray is not loaded.
e
.
args
+=
(
"ERROR: GPU forced but failed. "
,)
e
.
args
+=
(
"ERROR: GPU forced but failed. "
,)
raise
raise
use
.
device_number
=
None
use
.
device_number
=
None
def
handle_shared_float32
(
tf
):
def
handle_shared_float32
(
tf
):
"""Set the
CudaNdarrayType as the default handler for shared float32 arrays.
"""Set the
default shared type for float32 tensor to CudaNdarrayType
This function is intended to be called from use(gpu_index), not directly.
This function is intended to be called from use(gpu_index), not directly.
"""
"""
...
@@ -285,11 +313,14 @@ def handle_shared_float32(tf):
...
@@ -285,11 +313,14 @@ def handle_shared_float32(tf):
else
:
else
:
raise
NotImplementedError
(
'removing our handler'
)
raise
NotImplementedError
(
'removing our handler'
)
# We can't test the driver during import here as this cause circular
# import dependency. So we also test it in the file theano/__init__.py
if
config
.
device
.
startswith
(
'gpu'
):
if
config
.
device
.
startswith
(
'gpu'
):
use
(
device
=
config
.
device
,
force
=
config
.
force_device
)
use
(
device
=
config
.
device
,
force
=
config
.
force_device
,
test_driver
=
False
)
elif
config
.
init_gpu_device
:
elif
config
.
init_gpu_device
:
assert
config
.
device
==
"cpu"
,
(
"We can use the Theano flag init_gpu_device"
assert
config
.
device
==
"cpu"
,
(
" only when the Theano flag device=='cpu'"
)
"We can use the Theano flag init_gpu_device"
" only when the Theano flag device=='cpu'"
)
_logger
.
warning
((
"GPU device
%
s will be initialized, and used if a GPU is "
_logger
.
warning
((
"GPU device
%
s will be initialized, and used if a GPU is "
"needed. "
"needed. "
"However, no computation, nor shared variables, will be implicitly "
"However, no computation, nor shared variables, will be implicitly "
...
@@ -300,4 +331,4 @@ elif config.init_gpu_device:
...
@@ -300,4 +331,4 @@ elif config.init_gpu_device:
force
=
config
.
force_device
,
force
=
config
.
force_device
,
default_to_move_computation_to_gpu
=
False
,
default_to_move_computation_to_gpu
=
False
,
move_shared_float32_to_gpu
=
False
,
move_shared_float32_to_gpu
=
False
,
enable_cuda
=
False
)
enable_cuda
=
False
,
test_driver
=
False
)
theano/sandbox/cuda/basic_ops.py
浏览文件 @
8aa08ca2
...
@@ -33,7 +33,20 @@ def as_cuda_array(obj):
...
@@ -33,7 +33,20 @@ def as_cuda_array(obj):
else
:
else
:
raise
TypeError
(
"Don't know how to cast to a CudaNdarray object"
)
raise
TypeError
(
"Don't know how to cast to a CudaNdarray object"
)
class
HostFromGpu
(
Op
):
class
GpuOp
(
Op
):
def
make_thunk
(
self
,
node
,
storage_map
,
compute_map
,
no_recycling
):
if
theano
.
sandbox
.
cuda
.
use
.
device_number
is
None
:
theano
.
sandbox
.
cuda
.
use
(
"gpu"
,
force
=
True
,
default_to_move_computation_to_gpu
=
False
,
move_shared_float32_to_gpu
=
False
,
enable_cuda
=
False
)
return
super
(
GpuOp
,
self
)
.
make_thunk
(
node
,
storage_map
,
compute_map
,
no_recycling
)
class
HostFromGpu
(
GpuOp
):
"""
"""
Implement the transfer from gpu to the cpu.
Implement the transfer from gpu to the cpu.
"""
"""
...
@@ -65,7 +78,7 @@ class HostFromGpu(Op):
...
@@ -65,7 +78,7 @@ class HostFromGpu(Op):
return
xshp
return
xshp
host_from_gpu
=
HostFromGpu
()
host_from_gpu
=
HostFromGpu
()
class
GpuFromHost
(
Op
):
class
GpuFromHost
(
Gpu
Op
):
"""
"""
Implement the transfer from cpu to the gpu.
Implement the transfer from cpu to the gpu.
"""
"""
...
@@ -98,7 +111,8 @@ class GpuFromHost(Op):
...
@@ -98,7 +111,8 @@ class GpuFromHost(Op):
return
xshp
return
xshp
gpu_from_host
=
GpuFromHost
()
gpu_from_host
=
GpuFromHost
()
class
GpuElemwise
(
Op
):
class
GpuElemwise
(
GpuOp
):
"""
"""
Implement a generic elemwise on the gpu.
Implement a generic elemwise on the gpu.
"""
"""
...
@@ -208,7 +222,7 @@ class GpuElemwise(Op):
...
@@ -208,7 +222,7 @@ class GpuElemwise(Op):
def
c_code_cache_version
(
self
):
def
c_code_cache_version
(
self
):
return
self
.
src_generator
.
cache_version
return
self
.
src_generator
.
cache_version
class
GpuDimShuffle
(
Op
):
class
GpuDimShuffle
(
Gpu
Op
):
"""
"""
Implement DimShuffle on the gpu.
Implement DimShuffle on the gpu.
"""
"""
...
@@ -397,7 +411,7 @@ class GpuDimShuffle(Op):
...
@@ -397,7 +411,7 @@ class GpuDimShuffle(Op):
def
c_code_cache_version
(
self
):
def
c_code_cache_version
(
self
):
return
(
1
,
0
)
return
(
1
,
0
)
class
GpuSum
(
Op
):
class
GpuSum
(
Gpu
Op
):
"""GpuSum is a Reduction along some dimensions by summation.
"""GpuSum is a Reduction along some dimensions by summation.
The dimensions along which to sum is specified by the `reduce_mask` that you pass to the
The dimensions along which to sum is specified by the `reduce_mask` that you pass to the
...
@@ -1717,7 +1731,7 @@ class GpuSum(Op):
...
@@ -1717,7 +1731,7 @@ class GpuSum(Op):
"""
%
locals
()
"""
%
locals
()
return
sio
.
getvalue
()
return
sio
.
getvalue
()
class
GpuReshape
(
tensor
.
Reshape
):
class
GpuReshape
(
tensor
.
Reshape
,
GpuOp
):
"""
"""
Implement Reshape on the gpu.
Implement Reshape on the gpu.
"""
"""
...
@@ -1733,7 +1747,7 @@ class GpuReshape(tensor.Reshape):
...
@@ -1733,7 +1747,7 @@ class GpuReshape(tensor.Reshape):
', should be
%
i'
%
(
len
(
shp
),
self
.
ndim
),
shp
)
', should be
%
i'
%
(
len
(
shp
),
self
.
ndim
),
shp
)
out
[
0
]
=
x
.
reshape
(
tuple
(
shp
))
out
[
0
]
=
x
.
reshape
(
tuple
(
shp
))
class
GpuSubtensor
(
tensor
.
Subtensor
):
class
GpuSubtensor
(
tensor
.
Subtensor
,
GpuOp
):
"""
"""
Implement subtensor on the gpu.
Implement subtensor on the gpu.
"""
"""
...
@@ -1764,7 +1778,7 @@ class GpuSubtensor(tensor.Subtensor):
...
@@ -1764,7 +1778,7 @@ class GpuSubtensor(tensor.Subtensor):
cdata
=
cdata
[
0
]
cdata
=
cdata
[
0
]
out
[
0
]
=
x
.
__getitem__
(
cdata
)
out
[
0
]
=
x
.
__getitem__
(
cdata
)
class
GpuAdvancedSubtensor1
(
tensor
.
AdvancedSubtensor1
):
class
GpuAdvancedSubtensor1
(
tensor
.
AdvancedSubtensor1
,
GpuOp
):
"""
"""
Implement AdvancedSubtensor1 on the gpu.
Implement AdvancedSubtensor1 on the gpu.
"""
"""
...
@@ -1790,7 +1804,7 @@ class GpuAdvancedSubtensor1(tensor.AdvancedSubtensor1):
...
@@ -1790,7 +1804,7 @@ class GpuAdvancedSubtensor1(tensor.AdvancedSubtensor1):
o
[
j
]
=
x
[
i
]
o
[
j
]
=
x
[
i
]
out
[
0
]
=
o
out
[
0
]
=
o
class
GpuAdvancedIncSubtensor1
(
tensor
.
AdvancedIncSubtensor1
):
class
GpuAdvancedIncSubtensor1
(
tensor
.
AdvancedIncSubtensor1
,
GpuOp
):
"""
"""
Implement AdvancedIncSubtensor1 on the gpu.
Implement AdvancedIncSubtensor1 on the gpu.
"""
"""
...
@@ -1818,7 +1832,7 @@ class GpuAdvancedIncSubtensor1(tensor.AdvancedIncSubtensor1):
...
@@ -1818,7 +1832,7 @@ class GpuAdvancedIncSubtensor1(tensor.AdvancedIncSubtensor1):
# CudaNdarray_Subscript() don't support Advanced slicing.
# CudaNdarray_Subscript() don't support Advanced slicing.
# so we use the parent version that loop on each indices.
# so we use the parent version that loop on each indices.
class
GpuIncSubtensor
(
tensor
.
IncSubtensor
):
class
GpuIncSubtensor
(
tensor
.
IncSubtensor
,
GpuOp
):
"""
"""
Implement IncSubtensor on the gpu.
Implement IncSubtensor on the gpu.
"""
"""
...
@@ -1828,7 +1842,7 @@ class GpuIncSubtensor(tensor.IncSubtensor):
...
@@ -1828,7 +1842,7 @@ class GpuIncSubtensor(tensor.IncSubtensor):
rval
=
tensor
.
IncSubtensor
.
make_node
(
self
,
x
,
y
,
*
inputs
)
rval
=
tensor
.
IncSubtensor
.
make_node
(
self
,
x
,
y
,
*
inputs
)
return
Apply
(
self
,
[
x
,
y
]
+
rval
.
inputs
[
2
:],
[
x
.
type
()])
return
Apply
(
self
,
[
x
,
y
]
+
rval
.
inputs
[
2
:],
[
x
.
type
()])
class
GpuFlatten
(
tensor
.
Flatten
):
class
GpuFlatten
(
tensor
.
Flatten
,
GpuOp
):
"""
"""
Implement Flatten on the gpu.
Implement Flatten on the gpu.
"""
"""
...
@@ -1839,7 +1853,7 @@ class GpuFlatten(tensor.Flatten):
...
@@ -1839,7 +1853,7 @@ class GpuFlatten(tensor.Flatten):
out_type
=
CudaNdarrayType
(
broadcastable
=
host_out_broadcastable
)
out_type
=
CudaNdarrayType
(
broadcastable
=
host_out_broadcastable
)
return
Apply
(
self
,
[
x
],
[
out_type
()])
return
Apply
(
self
,
[
x
],
[
out_type
()])
class
GpuShape
(
tensor
.
Shape
):
class
GpuShape
(
tensor
.
Shape
,
GpuOp
):
"""
"""
Implement Shape on the gpu.
Implement Shape on the gpu.
"""
"""
...
@@ -1847,7 +1861,7 @@ class GpuShape(tensor.Shape):
...
@@ -1847,7 +1861,7 @@ class GpuShape(tensor.Shape):
return
Apply
(
self
,
[
x
],
[
tensor
.
lvector
()])
return
Apply
(
self
,
[
x
],
[
tensor
.
lvector
()])
gpu_shape
=
GpuShape
()
gpu_shape
=
GpuShape
()
class
GpuJoin
(
tensor
.
Join
):
class
GpuJoin
(
tensor
.
Join
,
GpuOp
):
"""
"""
Implement Join on the gpu.
Implement Join on the gpu.
"""
"""
...
@@ -1924,7 +1938,7 @@ class GpuJoin(tensor.Join):
...
@@ -1924,7 +1938,7 @@ class GpuJoin(tensor.Join):
gpu_join
=
GpuJoin
()
gpu_join
=
GpuJoin
()
class
GpuAlloc
(
Op
):
class
GpuAlloc
(
Gpu
Op
):
"""
"""
Implement Alloc on the gpu.
Implement Alloc on the gpu.
"""
"""
...
@@ -2023,7 +2037,7 @@ class GpuAlloc(Op):
...
@@ -2023,7 +2037,7 @@ class GpuAlloc(Op):
gpu_alloc
=
GpuAlloc
()
gpu_alloc
=
GpuAlloc
()
class
GpuContiguous
(
Op
):
class
GpuContiguous
(
Gpu
Op
):
"""
"""
Always return a c contiguous output. Copy the input only if it is
Always return a c contiguous output. Copy the input only if it is
not already c contiguous.
not already c contiguous.
...
...
theano/sandbox/cuda/blas.py
浏览文件 @
8aa08ca2
...
@@ -4,8 +4,9 @@ import StringIO, os
...
@@ -4,8 +4,9 @@ import StringIO, os
import
cuda_ndarray.cuda_ndarray
as
cuda
import
cuda_ndarray.cuda_ndarray
as
cuda
from
theano.sandbox.cuda.type
import
CudaNdarrayType
from
theano.sandbox.cuda.type
import
CudaNdarrayType
from
theano.sandbox.cuda
import
GpuOp
class
GpuDot22
(
Op
):
class
GpuDot22
(
Gpu
Op
):
"""
"""
Implement dot(2d, 2d) on the gpu.
Implement dot(2d, 2d) on the gpu.
"""
"""
...
@@ -76,7 +77,7 @@ class GpuDot22(Op):
...
@@ -76,7 +77,7 @@ class GpuDot22(Op):
"""
%
locals
()
"""
%
locals
()
gpu_dot22
=
GpuDot22
()
gpu_dot22
=
GpuDot22
()
class
GpuDot22Scalar
(
Op
):
class
GpuDot22Scalar
(
Gpu
Op
):
"""
"""
Implement dot(2d, 2d) * scalar on the gpu.
Implement dot(2d, 2d) * scalar on the gpu.
"""
"""
...
@@ -155,7 +156,7 @@ class GpuDot22Scalar(Op):
...
@@ -155,7 +156,7 @@ class GpuDot22Scalar(Op):
"""
%
locals
()
"""
%
locals
()
gpu_dot22scalar
=
GpuDot22Scalar
()
gpu_dot22scalar
=
GpuDot22Scalar
()
class
GpuGemm
(
Op
):
class
GpuGemm
(
Gpu
Op
):
"""
"""
implement the gemm on the gpu.
implement the gemm on the gpu.
...
@@ -257,7 +258,7 @@ class GpuGemm(Op):
...
@@ -257,7 +258,7 @@ class GpuGemm(Op):
gpu_gemm_no_inplace
=
GpuGemm
(
inplace
=
False
)
gpu_gemm_no_inplace
=
GpuGemm
(
inplace
=
False
)
gpu_gemm_inplace
=
GpuGemm
(
inplace
=
True
)
gpu_gemm_inplace
=
GpuGemm
(
inplace
=
True
)
class
GpuGemv
(
Op
):
class
GpuGemv
(
Gpu
Op
):
"""
"""
implement gemv on the gpu.
implement gemv on the gpu.
...
@@ -348,7 +349,7 @@ class GpuGemv(Op):
...
@@ -348,7 +349,7 @@ class GpuGemv(Op):
gpu_gemv_no_inplace
=
GpuGemv
(
inplace
=
False
)
gpu_gemv_no_inplace
=
GpuGemv
(
inplace
=
False
)
gpu_gemv_inplace
=
GpuGemv
(
inplace
=
True
)
gpu_gemv_inplace
=
GpuGemv
(
inplace
=
True
)
class
GpuGer
(
Op
):
class
GpuGer
(
Gpu
Op
):
"""
"""
implement ger on the gpu.
implement ger on the gpu.
...
@@ -439,7 +440,7 @@ class GpuGer(Op):
...
@@ -439,7 +440,7 @@ class GpuGer(Op):
gpu_ger_no_inplace
=
GpuGer
(
inplace
=
False
)
gpu_ger_no_inplace
=
GpuGer
(
inplace
=
False
)
gpu_ger_inplace
=
GpuGer
(
inplace
=
True
)
gpu_ger_inplace
=
GpuGer
(
inplace
=
True
)
class
GpuOuter
(
Op
):
class
GpuOuter
(
Gpu
Op
):
""" Implement outer on the gpu."""
""" Implement outer on the gpu."""
def
make_node
(
self
,
x
,
y
):
def
make_node
(
self
,
x
,
y
):
# we suppose type checking has been done, but make sure.
# we suppose type checking has been done, but make sure.
...
@@ -532,7 +533,7 @@ gpu_outer = GpuOuter()
...
@@ -532,7 +533,7 @@ gpu_outer = GpuOuter()
##
##
# Not really a BLAS operation, but whatever.
# Not really a BLAS operation, but whatever.
#
#
class
GpuConv
(
Op
):
class
GpuConv
(
Gpu
Op
):
"""
"""
Implement the batched and stacked 2d convolution on the gpu.
Implement the batched and stacked 2d convolution on the gpu.
"""
"""
...
@@ -698,7 +699,7 @@ class GpuConv(Op):
...
@@ -698,7 +699,7 @@ class GpuConv(Op):
"""
%
sub
"""
%
sub
class
GpuDownsampleFactorMax
(
Op
):
class
GpuDownsampleFactorMax
(
Gpu
Op
):
"""
"""
Implement downsample with max on the gpu.
Implement downsample with max on the gpu.
"""
"""
...
@@ -858,7 +859,7 @@ class GpuDownsampleFactorMax(Op):
...
@@ -858,7 +859,7 @@ class GpuDownsampleFactorMax(Op):
}
}
"""
%
locals
()
"""
%
locals
()
class
GpuDownsampleFactorMaxGrad
(
Op
):
class
GpuDownsampleFactorMaxGrad
(
Gpu
Op
):
"""
"""
Implement the grad of downsample with max on the gpu.
Implement the grad of downsample with max on the gpu.
"""
"""
...
...
theano/sandbox/cuda/nnet.py
浏览文件 @
8aa08ca2
...
@@ -3,11 +3,12 @@ from theano import tensor, scalar
...
@@ -3,11 +3,12 @@ from theano import tensor, scalar
import
StringIO
import
StringIO
from
theano.sandbox.cuda.type
import
CudaNdarrayType
from
theano.sandbox.cuda.type
import
CudaNdarrayType
from
theano.sandbox.cuda
import
GpuOp
from
theano.sandbox.cuda.kernel_codegen
import
nvcc_kernel
,
inline_reduce_max
,
inline_reduce_sum
,
inline_softmax
from
theano.sandbox.cuda.kernel_codegen
import
nvcc_kernel
,
inline_reduce_max
,
inline_reduce_sum
,
inline_softmax
class
GpuCrossentropySoftmaxArgmax1HotWithBias
(
Op
):
class
GpuCrossentropySoftmaxArgmax1HotWithBias
(
Gpu
Op
):
"""
"""
Implement CrossentropySoftmaxArgmax1HotWithBias on the gpu.
Implement CrossentropySoftmaxArgmax1HotWithBias on the gpu.
"""
"""
...
@@ -180,7 +181,7 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias (Op):
...
@@ -180,7 +181,7 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias (Op):
gpu_crossentropy_softmax_argmax_1hot_with_bias
=
GpuCrossentropySoftmaxArgmax1HotWithBias
()
gpu_crossentropy_softmax_argmax_1hot_with_bias
=
GpuCrossentropySoftmaxArgmax1HotWithBias
()
class
GpuCrossentropySoftmax1HotWithBiasDx
(
Op
):
class
GpuCrossentropySoftmax1HotWithBiasDx
(
Gpu
Op
):
"""
"""
Implement CrossentropySoftmax1HotWithBiasDx on the gpu.
Implement CrossentropySoftmax1HotWithBiasDx on the gpu.
"""
"""
...
@@ -302,7 +303,7 @@ class GpuCrossentropySoftmax1HotWithBiasDx (Op):
...
@@ -302,7 +303,7 @@ class GpuCrossentropySoftmax1HotWithBiasDx (Op):
gpu_crossentropy_softmax_1hot_with_bias_dx
=
GpuCrossentropySoftmax1HotWithBiasDx
()
gpu_crossentropy_softmax_1hot_with_bias_dx
=
GpuCrossentropySoftmax1HotWithBiasDx
()
class
GpuSoftmax
(
Op
):
class
GpuSoftmax
(
Gpu
Op
):
"""
"""
Implement Softmax on the gpu.
Implement Softmax on the gpu.
"""
"""
...
@@ -400,7 +401,7 @@ class GpuSoftmax (Op):
...
@@ -400,7 +401,7 @@ class GpuSoftmax (Op):
gpu_softmax
=
GpuSoftmax
()
gpu_softmax
=
GpuSoftmax
()
class
GpuSoftmaxWithBias
(
Op
):
class
GpuSoftmaxWithBias
(
Gpu
Op
):
"""
"""
Implement SoftmaxWithBias on the gpu.
Implement SoftmaxWithBias on the gpu.
"""
"""
...
...
theano/sandbox/cuda/rng_curand.py
浏览文件 @
8aa08ca2
...
@@ -10,7 +10,7 @@ __contact__ = "theano-dev@googlegroups.com"
...
@@ -10,7 +10,7 @@ __contact__ = "theano-dev@googlegroups.com"
import
sys
import
sys
import
numpy
import
numpy
import
theano.gof
import
theano.gof
from
theano.sandbox.cuda
import
CudaNdarrayType
from
theano.sandbox.cuda
import
CudaNdarrayType
,
GpuOp
from
theano.tensor
import
(
get_vector_length
,
cast
,
opt
)
from
theano.tensor
import
(
get_vector_length
,
cast
,
opt
)
from
theano.compile
import
optdb
from
theano.compile
import
optdb
from
theano.gof
import
local_optimizer
,
Variable
from
theano.gof
import
local_optimizer
,
Variable
...
@@ -19,7 +19,7 @@ from theano.gof import local_optimizer, Variable
...
@@ -19,7 +19,7 @@ from theano.gof import local_optimizer, Variable
config
=
theano
.
config
config
=
theano
.
config
class
CURAND_Base
(
theano
.
gof
.
Op
):
class
CURAND_Base
(
Gpu
Op
):
""" Base class for a random number generator implemented in CURAND.
""" Base class for a random number generator implemented in CURAND.
The random number generator itself is an opaque reference managed by
The random number generator itself is an opaque reference managed by
...
...
theano/sandbox/cuda/tests/test_basic_ops.py
浏览文件 @
8aa08ca2
...
@@ -19,23 +19,28 @@ import theano.sandbox.cuda.basic_ops as B
...
@@ -19,23 +19,28 @@ import theano.sandbox.cuda.basic_ops as B
from
theano.tensor.basic
import
_allclose
from
theano.tensor.basic
import
_allclose
from
theano.tests
import
unittest_tools
as
utt
from
theano.tests
import
unittest_tools
as
utt
if
theano
.
config
.
mode
==
'FAST_COMPILE'
:
if
theano
.
config
.
mode
==
'FAST_COMPILE'
:
mode_with_gpu
=
theano
.
compile
.
mode
.
get_mode
(
'FAST_RUN'
)
.
including
(
'gpu'
)
mode_with_gpu
=
theano
.
compile
.
mode
.
get_mode
(
'FAST_RUN'
)
.
including
(
'gpu'
)
mode_without_gpu
=
theano
.
compile
.
mode
.
get_mode
(
'FAST_RUN'
)
.
excluding
(
'gpu'
)
mode_without_gpu
=
theano
.
compile
.
mode
.
get_mode
(
'FAST_RUN'
)
.
excluding
(
'gpu'
)
else
:
else
:
mode_with_gpu
=
theano
.
compile
.
mode
.
get_default_mode
()
.
including
(
'gpu'
)
mode_with_gpu
=
theano
.
compile
.
mode
.
get_default_mode
()
.
including
(
'gpu'
)
mode_without_gpu
=
theano
.
compile
.
mode
.
get_default_mode
()
.
excluding
(
'gpu'
)
mode_without_gpu
=
theano
.
compile
.
mode
.
get_default_mode
()
.
excluding
(
'gpu'
)
def
rand_cuda_ndarray
(
shape
):
def
rand_cuda_ndarray
(
shape
):
return
cuda_ndarray
.
CudaNdarray
(
theano
.
_asarray
(
numpy
.
random
.
rand
(
*
shape
),
dtype
=
'float32'
))
return
cuda_ndarray
.
CudaNdarray
(
theano
.
_asarray
(
numpy
.
random
.
rand
(
*
shape
),
dtype
=
'float32'
))
#intentionally disabled
#intentionally disabled
def
tes_use
():
def
tes_use
():
tcn
.
use
()
tcn
.
use
()
def
test_sum
():
def
test_sum
():
"""
"""
test sum pattern 1, 11, 10, 01, 001, 010, 100, 110, 011, 111, 0011, 0101, 0111, 1011, 1111
test sum pattern 1, 11, 10, 01, 001, 010, 100, 110, 011, 111,
0011, 0101, 0111, 1011, 1111
test sum pattern implemented with reshape:
test sum pattern implemented with reshape:
1000, 0100, 0010, 0001, 11111
1000, 0100, 0010, 0001, 11111
...
@@ -91,18 +96,18 @@ def test_sum():
...
@@ -91,18 +96,18 @@ def test_sum():
((
1100
,
2
,
3
,
4
,
5
),[
0
,
1
,
2
,
3
,
4
]),((
2
,
1100
,
3
,
4
,
5
),[
0
,
1
,
2
,
3
,
4
]),((
2
,
3
,
1100
,
4
,
5
),[
0
,
1
,
2
,
3
,
4
]),((
2
,
3
,
4
,
1100
,
5
),[
0
,
1
,
2
,
3
,
4
]),((
2
,
3
,
4
,
5
,
1100
),[
0
,
1
,
2
,
3
,
4
]),
#11111
((
1100
,
2
,
3
,
4
,
5
),[
0
,
1
,
2
,
3
,
4
]),((
2
,
1100
,
3
,
4
,
5
),[
0
,
1
,
2
,
3
,
4
]),((
2
,
3
,
1100
,
4
,
5
),[
0
,
1
,
2
,
3
,
4
]),((
2
,
3
,
4
,
1100
,
5
),[
0
,
1
,
2
,
3
,
4
]),((
2
,
3
,
4
,
5
,
1100
),[
0
,
1
,
2
,
3
,
4
]),
#11111
]:
]:
a
=
tensor
.
TensorType
(
'float32'
,
(
False
,)
*
len
(
shape
))()
a
=
tensor
.
TensorType
(
'float32'
,
(
False
,)
*
len
(
shape
))()
b
=
T
.
Sum
(
pattern
)(
a
)
b
=
T
.
Sum
(
pattern
)(
a
)
val
=
numpy
.
random
.
rand
(
numpy
.
prod
(
shape
))
.
reshape
(
shape
)
val
=
numpy
.
random
.
rand
(
numpy
.
prod
(
shape
))
.
reshape
(
shape
)
# val = numpy.ones(shape)
# val = numpy.ones(shape)
# val = numpy.arange(numpy.prod(shape)).reshape(shape)
# val = numpy.arange(numpy.prod(shape)).reshape(shape)
val
=
theano
.
_asarray
(
val
,
dtype
=
'float32'
)
val
=
theano
.
_asarray
(
val
,
dtype
=
'float32'
)
f
=
theano
.
function
([
a
],
b
,
mode
=
mode_with_gpu
)
f
=
theano
.
function
([
a
],
b
,
mode
=
mode_with_gpu
)
f2
=
theano
.
function
([
a
],
b
,
mode
=
mode_without_gpu
)
f2
=
theano
.
function
([
a
],
b
,
mode
=
mode_without_gpu
)
assert
tcn
.
GpuSum
in
[
x
.
op
.
__class__
for
x
in
f
.
maker
.
env
.
toposort
()]
assert
tcn
.
GpuSum
in
[
x
.
op
.
__class__
for
x
in
f
.
maker
.
env
.
toposort
()]
assert
T
.
Sum
in
[
x
.
op
.
__class__
for
x
in
f2
.
maker
.
env
.
toposort
()]
assert
T
.
Sum
in
[
x
.
op
.
__class__
for
x
in
f2
.
maker
.
env
.
toposort
()]
if
val
.
size
==
0
:
if
val
.
size
==
0
:
assert
f2
(
val
)
==
f
(
val
),
(
'shape'
,
shape
,
'pattern'
,
pattern
)
assert
f2
(
val
)
==
f
(
val
),
(
'shape'
,
shape
,
'pattern'
,
pattern
)
else
:
else
:
try
:
try
:
#We raise the error threashold as we sum big matrix
#We raise the error threashold as we sum big matrix
...
@@ -110,7 +115,9 @@ def test_sum():
...
@@ -110,7 +115,9 @@ def test_sum():
#example in debug mode with unittests.rseed=9275
#example in debug mode with unittests.rseed=9275
orig_rtol
=
theano
.
tensor
.
basic
.
float32_rtol
orig_rtol
=
theano
.
tensor
.
basic
.
float32_rtol
theano
.
tensor
.
basic
.
float32_rtol
=
2e-5
theano
.
tensor
.
basic
.
float32_rtol
=
2e-5
assert
_allclose
(
f2
(
val
),
f
(
val
)),
(
'shape'
,
shape
,
'pattern'
,
pattern
,
sum
([
shape
[
i
]
for
i
in
pattern
]))
assert
_allclose
(
f2
(
val
),
f
(
val
)),
(
'shape'
,
shape
,
'pattern'
,
pattern
,
sum
([
shape
[
i
]
for
i
in
pattern
]))
finally
:
finally
:
theano
.
tensor
.
basic
.
float32_rtol
=
orig_rtol
theano
.
tensor
.
basic
.
float32_rtol
=
orig_rtol
...
@@ -121,21 +128,23 @@ def test_sum():
...
@@ -121,21 +128,23 @@ def test_sum():
((
5
,
4
),[
0
,
1
]),((
5
,
4
),[
0
]),
((
5
,
4
),[
0
,
1
]),((
5
,
4
),[
0
]),
((
5
,
4
,
3
),[
0
]),((
5
,
4
,
3
),[
0
,
1
]),((
5
,
4
,
3
),[
2
]),((
5
,
4
,
3
),[
0
,
1
,
2
]),
((
5
,
4
,
3
),[
0
]),((
5
,
4
,
3
),[
0
,
1
]),((
5
,
4
,
3
),[
2
]),((
5
,
4
,
3
),[
0
,
1
,
2
]),
((
5
,
4
,
3
,
2
),[
0
,
1
,
2
,
3
]),
((
5
,
4
,
3
,
2
),[
0
,
2
,
3
])]:
((
5
,
4
,
3
,
2
),[
0
,
1
,
2
,
3
]),
((
5
,
4
,
3
,
2
),[
0
,
2
,
3
])]:
a
=
tensor
.
TensorType
(
'float32'
,
(
False
,)
*
len
(
shape
))()
a
=
tensor
.
TensorType
(
'float32'
,
(
False
,)
*
len
(
shape
))()
dim_pattern
=
range
(
len
(
shape
))
dim_pattern
=
range
(
len
(
shape
))
dim_pattern
[
0
]
=
1
dim_pattern
[
0
]
=
1
dim_pattern
[
1
]
=
0
dim_pattern
[
1
]
=
0
a
=
a
.
dimshuffle
(
dim_pattern
)
a
=
a
.
dimshuffle
(
dim_pattern
)
b
=
T
.
Sum
(
pattern
)(
a
)
b
=
T
.
Sum
(
pattern
)(
a
)
val
=
numpy
.
random
.
rand
(
numpy
.
prod
(
shape
))
.
reshape
(
shape
)
val
=
numpy
.
random
.
rand
(
numpy
.
prod
(
shape
))
.
reshape
(
shape
)
# val = numpy.ones(shape)
# val = numpy.ones(shape)
# val = numpy.arange(numpy.prod(shape)).reshape(shape)
# val = numpy.arange(numpy.prod(shape)).reshape(shape)
val
=
theano
.
_asarray
(
val
,
dtype
=
'float32'
)
val
=
theano
.
_asarray
(
val
,
dtype
=
'float32'
)
f
=
theano
.
function
([
a
],
b
,
mode
=
mode_with_gpu
)
f
=
theano
.
function
([
a
],
b
,
mode
=
mode_with_gpu
)
f2
=
theano
.
function
([
a
],
b
,
mode
=
mode_without_gpu
)
f2
=
theano
.
function
([
a
],
b
,
mode
=
mode_without_gpu
)
assert
tcn
.
GpuSum
in
[
x
.
op
.
__class__
for
x
in
f
.
maker
.
env
.
toposort
()]
assert
tcn
.
GpuSum
in
[
x
.
op
.
__class__
for
x
in
f
.
maker
.
env
.
toposort
()]
assert
T
.
Sum
in
[
x
.
op
.
__class__
for
x
in
f2
.
maker
.
env
.
toposort
()]
assert
T
.
Sum
in
[
x
.
op
.
__class__
for
x
in
f2
.
maker
.
env
.
toposort
()]
assert
_allclose
(
f2
(
val
),
f
(
val
)),
(
'shape'
,
shape
,
'pattern'
,
pattern
,
sum
([
shape
[
i
]
for
i
in
pattern
]))
assert
_allclose
(
f2
(
val
),
f
(
val
)),
(
'shape'
,
shape
,
'pattern'
,
pattern
,
sum
([
shape
[
i
]
for
i
in
pattern
]))
#test with broadcast
#test with broadcast
...
@@ -143,116 +152,135 @@ def test_sum():
...
@@ -143,116 +152,135 @@ def test_sum():
((
5
,
4
),[
0
,
1
]),((
5
,
4
),[
0
]),
((
5
,
4
),[
0
,
1
]),((
5
,
4
),[
0
]),
((
5
,
4
,
3
),[
0
]),((
5
,
4
,
3
),[
0
,
1
]),((
5
,
4
,
3
),[
2
]),((
5
,
4
,
3
),[
0
,
1
,
2
]),
((
5
,
4
,
3
),[
0
]),((
5
,
4
,
3
),[
0
,
1
]),((
5
,
4
,
3
),[
2
]),((
5
,
4
,
3
),[
0
,
1
,
2
]),
((
5
,
4
,
3
,
2
),[
0
,
1
,
2
,
3
]),
((
5
,
4
,
3
,
2
),[
0
,
2
,
3
])]:
((
5
,
4
,
3
,
2
),[
0
,
1
,
2
,
3
]),
((
5
,
4
,
3
,
2
),[
0
,
2
,
3
])]:
shape
=
numpy
.
asarray
(
shape
)
*
2
shape
=
numpy
.
asarray
(
shape
)
*
2
a
=
tensor
.
TensorType
(
'float32'
,
(
False
,)
*
len
(
shape
))()
a
=
tensor
.
TensorType
(
'float32'
,
(
False
,)
*
len
(
shape
))()
a2
=
tcn
.
CudaNdarrayType
((
False
,)
*
len
(
shape
))()
a2
=
tcn
.
CudaNdarrayType
((
False
,)
*
len
(
shape
))()
b
=
T
.
Sum
(
pattern
)(
a
)
b
=
T
.
Sum
(
pattern
)(
a
)
b2
=
T
.
Sum
(
pattern
)(
a2
)
b2
=
T
.
Sum
(
pattern
)(
a2
)
val
=
numpy
.
random
.
rand
(
numpy
.
prod
(
shape
))
.
reshape
(
shape
)
val
=
numpy
.
random
.
rand
(
numpy
.
prod
(
shape
))
.
reshape
(
shape
)
# val = numpy.ones(shape)
# val = numpy.ones(shape)
# val = numpy.arange(numpy.prod(shape)).reshape(shape)
# val = numpy.arange(numpy.prod(shape)).reshape(shape)
val
=
theano
.
_asarray
(
val
,
dtype
=
'float32'
)
val
=
theano
.
_asarray
(
val
,
dtype
=
'float32'
)
val2
=
cuda
.
CudaNdarray
(
val
)
val2
=
cuda
.
CudaNdarray
(
val
)
if
len
(
shape
)
==
1
:
if
len
(
shape
)
==
1
:
val
=
val
[::
2
]
val
=
val
[::
2
]
val2
=
val2
[::
2
]
val2
=
val2
[::
2
]
elif
len
(
shape
)
==
2
:
elif
len
(
shape
)
==
2
:
val
=
val
[::
2
,::
2
]
val
=
val
[::
2
,
::
2
]
val2
=
val2
[::
2
,::
2
]
val2
=
val2
[::
2
,
::
2
]
elif
len
(
shape
)
==
3
:
elif
len
(
shape
)
==
3
:
val
=
val
[::
2
,
::
2
,
::
2
]
val
=
val
[::
2
,
::
2
,
::
2
]
val2
=
val2
[::
2
,
::
2
,
::
2
]
val2
=
val2
[::
2
,
::
2
,
::
2
]
elif
len
(
shape
)
==
4
:
elif
len
(
shape
)
==
4
:
val
=
val
[::
2
,
::
2
,::
2
,
::
2
]
val
=
val
[::
2
,
::
2
,
::
2
,
::
2
]
val2
=
val2
[::
2
,
::
2
,::
2
,
::
2
]
val2
=
val2
[::
2
,
::
2
,
::
2
,
::
2
]
f
=
theano
.
function
([
a
],
b
,
mode
=
mode_without_gpu
)
f
=
theano
.
function
([
a
],
b
,
mode
=
mode_without_gpu
)
f2
=
theano
.
function
([
a2
],
b2
,
mode
=
mode_with_gpu
)
f2
=
theano
.
function
([
a2
],
b2
,
mode
=
mode_with_gpu
)
assert
tcn
.
GpuSum
in
[
x
.
op
.
__class__
for
x
in
f2
.
maker
.
env
.
toposort
()]
assert
tcn
.
GpuSum
in
[
x
.
op
.
__class__
for
x
in
f2
.
maker
.
env
.
toposort
()]
assert
T
.
Sum
in
[
x
.
op
.
__class__
for
x
in
f
.
maker
.
env
.
toposort
()]
assert
T
.
Sum
in
[
x
.
op
.
__class__
for
x
in
f
.
maker
.
env
.
toposort
()]
assert
_allclose
(
f2
(
val2
),
f
(
val
)),
(
'shape'
,
shape
,
'pattern'
,
pattern
,
sum
([
shape
[
i
]
for
i
in
pattern
]))
assert
_allclose
(
f2
(
val2
),
f
(
val
)),
(
'shape'
,
shape
,
'pattern'
,
pattern
,
sum
([
shape
[
i
]
for
i
in
pattern
]))
def
test_flatten
():
def
test_flatten
():
x
=
cuda
.
fmatrix
(
'x'
)
x
=
cuda
.
fmatrix
(
'x'
)
f
=
theano
.
function
([
x
],
x
.
flatten
())
f
=
theano
.
function
([
x
],
x
.
flatten
())
assert
len
(
f
(
[[
0.
,
0.
],[
0.
,
0.
]]
)
.
shape
)
==
1
assert
len
(
f
([[
0.
,
0.
],
[
0.
,
0.
]])
.
shape
)
==
1
def
test_reshape
():
def
test_reshape
():
a
=
tcn
.
CudaNdarrayType
((
False
,))()
a
=
tcn
.
CudaNdarrayType
((
False
,))()
b
=
tcn
.
CudaNdarrayType
((
False
,
False
))()
b
=
tcn
.
CudaNdarrayType
((
False
,
False
))()
c
=
T
.
reshape
(
a
,
[
2
,
3
])
c
=
T
.
reshape
(
a
,
[
2
,
3
])
#basic
#basic
f
=
theano
.
function
([
a
],
c
,
mode
=
mode_without_gpu
)
f
=
theano
.
function
([
a
],
c
,
mode
=
mode_with_gpu
)
fv
=
f
(
cuda_ndarray
.
CudaNdarray
(
theano
.
_asarray
([
0
,
1
,
2
,
3
,
4
,
5
],
dtype
=
'float32'
)))
fv
=
f
(
cuda_ndarray
.
CudaNdarray
(
theano
.
_asarray
([
0
,
1
,
2
,
3
,
4
,
5
],
assert
numpy
.
all
(
fv
==
numpy
.
asarray
([[
0
,
1
,
2
],
[
3
,
4
,
5
]]))
dtype
=
'float32'
)))
topo
=
f
.
maker
.
env
.
toposort
()
assert
any
([
isinstance
(
node
.
op
,
B
.
GpuReshape
)
for
node
in
topo
])
assert
numpy
.
all
(
fv
==
numpy
.
asarray
([[
0
,
1
,
2
],
[
3
,
4
,
5
]]))
#test that it works without inplace operations
#test that it works without inplace operations
a_val
=
cuda_ndarray
.
CudaNdarray
(
theano
.
_asarray
([
0
,
1
,
2
,
3
,
4
,
5
],
dtype
=
'float32'
))
a_val
=
cuda_ndarray
.
CudaNdarray
(
theano
.
_asarray
([
0
,
1
,
2
,
3
,
4
,
5
],
a_val_copy
=
cuda_ndarray
.
CudaNdarray
(
theano
.
_asarray
([
0
,
1
,
2
,
3
,
4
,
5
],
dtype
=
'float32'
))
dtype
=
'float32'
))
b_val
=
cuda_ndarray
.
CudaNdarray
(
theano
.
_asarray
([[
0
,
1
,
2
],[
3
,
4
,
5
]],
dtype
=
'float32'
))
a_val_copy
=
cuda_ndarray
.
CudaNdarray
(
theano
.
_asarray
([
0
,
1
,
2
,
3
,
4
,
5
],
dtype
=
'float32'
))
f_sub
=
theano
.
function
([
a
,
b
],
c
-
b
,
mode
=
mode_without_gpu
)
b_val
=
cuda_ndarray
.
CudaNdarray
(
theano
.
_asarray
([[
0
,
1
,
2
],
[
3
,
4
,
5
]],
dtype
=
'float32'
))
f_sub
=
theano
.
function
([
a
,
b
],
c
-
b
,
mode
=
mode_with_gpu
)
topo
=
f_sub
.
maker
.
env
.
toposort
()
assert
any
([
isinstance
(
node
.
op
,
B
.
GpuReshape
)
for
node
in
topo
])
assert
numpy
.
all
(
f_sub
(
a_val
,
b_val
)
==
0.0
)
assert
numpy
.
all
(
f_sub
(
a_val
,
b_val
)
==
0.0
)
assert
numpy
.
all
(
numpy
.
asarray
(
a_val
)
==
numpy
.
asarray
(
a_val_copy
))
assert
numpy
.
all
(
numpy
.
asarray
(
a_val
)
==
numpy
.
asarray
(
a_val_copy
))
#test that it works with inplace operations
#test that it works with inplace operations
a_val
=
theano
.
_asarray
([
0
,
1
,
2
,
3
,
4
,
5
],
dtype
=
'float32'
)
a_val
=
theano
.
_asarray
([
0
,
1
,
2
,
3
,
4
,
5
],
dtype
=
'float32'
)
a_val_copy
=
theano
.
_asarray
([
0
,
1
,
2
,
3
,
4
,
5
],
dtype
=
'float32'
)
a_val_copy
=
theano
.
_asarray
([
0
,
1
,
2
,
3
,
4
,
5
],
dtype
=
'float32'
)
b_val
=
theano
.
_asarray
([[
0
,
1
,
2
],[
3
,
4
,
5
]],
dtype
=
'float32'
)
b_val
=
theano
.
_asarray
([[
0
,
1
,
2
],
[
3
,
4
,
5
]],
dtype
=
'float32'
)
f_sub
=
theano
.
function
([
a
,
b
],
c
-
b
,
mode
=
mode_without_gpu
)
f_sub
=
theano
.
function
([
a
,
b
],
c
-
b
,
mode
=
mode_with_gpu
)
topo
=
f_sub
.
maker
.
env
.
toposort
()
assert
any
([
isinstance
(
node
.
op
,
B
.
GpuReshape
)
for
node
in
topo
])
assert
numpy
.
all
(
f_sub
(
a_val
,
b_val
)
==
0.0
)
assert
numpy
.
all
(
f_sub
(
a_val
,
b_val
)
==
0.0
)
assert
numpy
.
all
(
numpy
.
asarray
(
a_val
)
==
numpy
.
asarray
(
a_val_copy
))
assert
numpy
.
all
(
numpy
.
asarray
(
a_val
)
==
numpy
.
asarray
(
a_val_copy
))
# verify gradient
# verify gradient
def
just_vals
(
v
):
def
just_vals
(
v
):
return
T
.
Reshape
(
2
)(
v
,
theano
.
_asarray
([
2
,
3
],
dtype
=
'int32'
))
return
T
.
Reshape
(
2
)(
v
,
theano
.
_asarray
([
2
,
3
],
dtype
=
'int32'
))
utt
.
verify_grad
(
just_vals
,
[
a_val
])
utt
.
verify_grad
(
just_vals
,
[
a_val
])
def
test_elemwise_empty
():
def
test_elemwise_empty
():
#test with 0 element
#test with 0 element
a
=
tcn
.
shared_constructor
(
theano
.
_asarray
(
numpy
.
random
.
rand
(
0
,
0
),
dtype
=
'float32'
),
'a'
)
a
=
tcn
.
shared_constructor
(
theano
.
_asarray
(
numpy
.
random
.
rand
(
0
,
0
),
dtype
=
'float32'
),
'a'
)
b
=
tensor
.
fmatrix
()
b
=
tensor
.
fmatrix
()
f
=
pfunc
([
b
],
[],
updates
=
[(
a
,
a
+
b
)],
mode
=
mode_with_gpu
)
f
=
pfunc
([
b
],
[],
updates
=
[(
a
,
a
+
b
)],
mode
=
mode_with_gpu
)
f2
=
pfunc
([
b
],
[],
updates
=
[(
a
,
a
+
b
)],
mode
=
mode_without_gpu
)
f2
=
pfunc
([
b
],
[],
updates
=
[(
a
,
a
+
b
)],
mode
=
mode_without_gpu
)
a0
=
a
.
get_value
()
*
1.0
a0
=
a
.
get_value
()
*
1.0
f
(
numpy
.
ones
((
0
,
0
),
dtype
=
'float32'
))
f
(
numpy
.
ones
((
0
,
0
),
dtype
=
'float32'
))
assert
numpy
.
all
(
a0
+
1.0
==
a
.
get_value
())
assert
numpy
.
all
(
a0
+
1.0
==
a
.
get_value
())
def
test_elemwise0
():
def
test_elemwise0
():
a
=
tcn
.
shared_constructor
(
theano
.
_asarray
(
numpy
.
random
.
rand
(
4
,
4
),
dtype
=
'float32'
),
'a'
)
a
=
tcn
.
shared_constructor
(
theano
.
_asarray
(
numpy
.
random
.
rand
(
4
,
4
),
dtype
=
'float32'
),
'a'
)
b
=
tensor
.
fmatrix
()
b
=
tensor
.
fmatrix
()
f
=
pfunc
([
b
],
[],
updates
=
[(
a
,
a
+
b
)],
mode
=
mode_with_gpu
)
f
=
pfunc
([
b
],
[],
updates
=
[(
a
,
a
+
b
)],
mode
=
mode_with_gpu
)
#check that we work inplace.
#check that we work inplace.
assert
f
.
maker
.
env
.
toposort
()[
1
]
.
op
.
destroy_map
.
items
()
==
[(
0
,
[
0
])]
assert
f
.
maker
.
env
.
toposort
()[
1
]
.
op
.
destroy_map
.
items
()
==
[(
0
,
[
0
])]
a0
=
a
.
get_value
()
*
1.0
a0
=
a
.
get_value
()
*
1.0
print
'BEFORE ADD'
,
a
.
get_value
()
print
'BEFORE ADD'
,
a
.
get_value
()
for
i
,
node
in
enumerate
(
f
.
maker
.
env
.
toposort
()):
for
i
,
node
in
enumerate
(
f
.
maker
.
env
.
toposort
()):
print
i
,
node
print
i
,
node
f
(
numpy
.
ones
((
4
,
4
),
dtype
=
'float32'
))
f
(
numpy
.
ones
((
4
,
4
),
dtype
=
'float32'
))
print
'AFTER ADD'
,
a
.
get_value
()
print
'AFTER ADD'
,
a
.
get_value
()
assert
numpy
.
all
(
a0
+
1.0
==
a
.
get_value
())
assert
numpy
.
all
(
a0
+
1.0
==
a
.
get_value
())
def
test_elemwise_bad_broadcast
():
def
test_elemwise_bad_broadcast
():
x
=
cuda
.
fmatrix
(
'x'
)
x
=
cuda
.
fmatrix
(
'x'
)
y
=
cuda
.
fmatrix
(
'y'
)
y
=
cuda
.
fmatrix
(
'y'
)
f
=
theano
.
function
([
x
,
y
],
x
*
y
,
mode
=
mode_with_gpu
)
f
=
theano
.
function
([
x
,
y
],
x
*
y
,
mode
=
mode_with_gpu
)
print
f
.
maker
.
env
.
toposort
()
print
f
.
maker
.
env
.
toposort
()
assert
len
(
f
.
maker
.
env
.
toposort
())
==
2
assert
len
(
f
.
maker
.
env
.
toposort
())
==
2
assert
isinstance
(
f
.
maker
.
env
.
toposort
()[
0
]
.
op
,
cuda
.
GpuElemwise
)
assert
isinstance
(
f
.
maker
.
env
.
toposort
()[
0
]
.
op
,
cuda
.
GpuElemwise
)
assert
f
.
maker
.
env
.
toposort
()[
1
]
.
op
==
cuda
.
host_from_gpu
assert
f
.
maker
.
env
.
toposort
()[
1
]
.
op
==
cuda
.
host_from_gpu
try
:
try
:
f
(
rand_cuda_ndarray
((
10
,
3
)),
rand_cuda_ndarray
((
10
,
1
)))
f
(
rand_cuda_ndarray
((
10
,
3
)),
rand_cuda_ndarray
((
10
,
1
)))
...
@@ -261,41 +289,48 @@ def test_elemwise_bad_broadcast():
...
@@ -261,41 +289,48 @@ def test_elemwise_bad_broadcast():
else
:
else
:
raise
Exception
(
"Theano should have raised an error"
)
raise
Exception
(
"Theano should have raised an error"
)
def
test_elemwise1
():
def
test_elemwise1
():
""" Several kinds of elemwise expressions with no broadcasting, non power-of-two shape """
""" Several kinds of elemwise expressions with no broadcasting,
non power-of-two shape """
shape
=
(
3
,
4
)
shape
=
(
3
,
4
)
a
=
tcn
.
shared_constructor
(
theano
.
_asarray
(
numpy
.
random
.
rand
(
*
shape
),
dtype
=
'float32'
)
+
0.5
,
'a'
)
a
=
tcn
.
shared_constructor
(
theano
.
_asarray
(
numpy
.
random
.
rand
(
*
shape
),
dtype
=
'float32'
)
+
0.5
,
'a'
)
b
=
tensor
.
fmatrix
()
b
=
tensor
.
fmatrix
()
#let debugmode catch any mistakes
#let debugmode catch any mistakes
print
>>
sys
.
stdout
,
"STARTING FUNCTION 1"
print
>>
sys
.
stdout
,
"STARTING FUNCTION 1"
f
=
pfunc
([
b
],
[],
updates
=
[(
a
,
b
**
a
)],
mode
=
mode_with_gpu
)
f
=
pfunc
([
b
],
[],
updates
=
[(
a
,
b
**
a
)],
mode
=
mode_with_gpu
)
for
i
,
node
in
enumerate
(
f
.
maker
.
env
.
toposort
()):
for
i
,
node
in
enumerate
(
f
.
maker
.
env
.
toposort
()):
print
i
,
node
print
i
,
node
f
(
theano
.
_asarray
(
numpy
.
random
.
rand
(
*
shape
),
dtype
=
'float32'
)
+
0.3
)
f
(
theano
.
_asarray
(
numpy
.
random
.
rand
(
*
shape
),
dtype
=
'float32'
)
+
0.3
)
print
>>
sys
.
stdout
,
"STARTING FUNCTION 2"
print
>>
sys
.
stdout
,
"STARTING FUNCTION 2"
#let debugmode catch any mistakes
#let debugmode catch any mistakes
f
=
pfunc
([
b
],
[],
updates
=
[(
a
,
tensor
.
exp
(
b
**
a
))],
mode
=
mode_with_gpu
)
f
=
pfunc
([
b
],
[],
updates
=
[(
a
,
tensor
.
exp
(
b
**
a
))],
mode
=
mode_with_gpu
)
for
i
,
node
in
enumerate
(
f
.
maker
.
env
.
toposort
()):
for
i
,
node
in
enumerate
(
f
.
maker
.
env
.
toposort
()):
print
i
,
node
print
i
,
node
f
(
theano
.
_asarray
(
numpy
.
random
.
rand
(
*
shape
),
dtype
=
'float32'
)
+
0.3
)
f
(
theano
.
_asarray
(
numpy
.
random
.
rand
(
*
shape
),
dtype
=
'float32'
)
+
0.3
)
print
>>
sys
.
stdout
,
"STARTING FUNCTION 3"
print
>>
sys
.
stdout
,
"STARTING FUNCTION 3"
#let debugmode catch any mistakes
#let debugmode catch any mistakes
f
=
pfunc
([
b
],
[],
updates
=
[(
a
,
a
+
b
*
tensor
.
exp
(
b
**
a
))],
mode
=
mode_with_gpu
)
f
=
pfunc
([
b
],
[],
updates
=
[(
a
,
a
+
b
*
tensor
.
exp
(
b
**
a
))],
f
(
theano
.
_asarray
(
numpy
.
random
.
rand
(
*
shape
),
dtype
=
'float32'
)
+
0.3
)
mode
=
mode_with_gpu
)
f
(
theano
.
_asarray
(
numpy
.
random
.
rand
(
*
shape
),
dtype
=
'float32'
)
+
0.3
)
def
test_elemwise2
():
def
test_elemwise2
():
""" Several kinds of elemwise expressions with dimension permutations """
""" Several kinds of elemwise expressions with dimension permutations """
rng
=
numpy
.
random
.
RandomState
(
int
(
time
.
time
()))
rng
=
numpy
.
random
.
RandomState
(
int
(
time
.
time
()))
print
'random?'
,
rng
.
rand
(
3
)
print
'random?'
,
rng
.
rand
(
3
)
shape
=
(
3
,
5
)
shape
=
(
3
,
5
)
for
pattern
in
[(
0
,
1
),
(
1
,
0
)]:
for
pattern
in
[(
0
,
1
),
(
1
,
0
)]:
a
=
tcn
.
shared_constructor
(
theano
.
_asarray
(
rng
.
rand
(
*
shape
),
dtype
=
'float32'
),
name
=
None
)
a
=
tcn
.
shared_constructor
(
theano
.
_asarray
(
rng
.
rand
(
*
shape
),
b
=
tensor
.
Tensor
(
dtype
=
'float32'
,
broadcastable
=
[
0
]
*
len
(
shape
))()
dtype
=
'float32'
),
name
=
None
)
f
=
pfunc
([
b
],
[],
updates
=
[(
a
,
(
a
+
b
)
.
dimshuffle
(
pattern
))],
mode
=
mode_with_gpu
)
b
=
tensor
.
Tensor
(
dtype
=
'float32'
,
broadcastable
=
[
0
]
*
len
(
shape
))()
f
=
pfunc
([
b
],
[],
updates
=
[(
a
,
(
a
+
b
)
.
dimshuffle
(
pattern
))],
mode
=
mode_with_gpu
)
has_elemwise
=
False
has_elemwise
=
False
for
i
,
node
in
enumerate
(
f
.
maker
.
env
.
toposort
()):
for
i
,
node
in
enumerate
(
f
.
maker
.
env
.
toposort
()):
print
>>
sys
.
stdout
,
i
,
node
print
>>
sys
.
stdout
,
i
,
node
...
@@ -303,34 +338,39 @@ def test_elemwise2():
...
@@ -303,34 +338,39 @@ def test_elemwise2():
assert
not
has_elemwise
assert
not
has_elemwise
#let debugmode catch errors
#let debugmode catch errors
print
>>
sys
.
stdout
,
'pattern'
,
pattern
print
>>
sys
.
stdout
,
'pattern'
,
pattern
f
(
theano
.
_asarray
(
rng
.
rand
(
*
shape
),
dtype
=
'float32'
)
*.
3
)
f
(
theano
.
_asarray
(
rng
.
rand
(
*
shape
),
dtype
=
'float32'
)
*
.
3
)
shape
=
(
3
,
4
,
5
,
6
)
shape
=
(
3
,
4
,
5
,
6
)
a
=
tcn
.
shared_constructor
(
theano
.
_asarray
(
rng
.
rand
(
*
shape
),
dtype
=
'float32'
),
'a'
)
a
=
tcn
.
shared_constructor
(
theano
.
_asarray
(
rng
.
rand
(
*
shape
),
b
=
tensor
.
Tensor
(
dtype
=
'float32'
,
broadcastable
=
[
0
]
*
len
(
shape
))()
dtype
=
'float32'
),
'a'
)
f
=
pfunc
([
b
],
[],
updates
=
[(
a
,
(
a
+
b
)
.
dimshuffle
([
2
,
0
,
3
,
1
])
*
b
=
tensor
.
Tensor
(
dtype
=
'float32'
,
broadcastable
=
[
0
]
*
len
(
shape
))()
tensor
.
exp
(
b
**
a
)
.
dimshuffle
([
2
,
0
,
3
,
1
]))],
mode
=
mode_with_gpu
)
f
=
pfunc
([
b
],
[],
updates
=
[(
a
,
(
a
+
b
)
.
dimshuffle
([
2
,
0
,
3
,
1
])
*
tensor
.
exp
(
b
**
a
)
.
dimshuffle
([
2
,
0
,
3
,
1
]))],
mode
=
mode_with_gpu
)
has_elemwise
=
False
has_elemwise
=
False
for
i
,
node
in
enumerate
(
f
.
maker
.
env
.
toposort
()):
for
i
,
node
in
enumerate
(
f
.
maker
.
env
.
toposort
()):
print
i
,
node
print
i
,
node
has_elemwise
=
has_elemwise
or
isinstance
(
node
.
op
,
tensor
.
Elemwise
)
has_elemwise
=
has_elemwise
or
isinstance
(
node
.
op
,
tensor
.
Elemwise
)
assert
not
has_elemwise
assert
not
has_elemwise
#let debugmode catch errors
#let debugmode catch errors
f
(
theano
.
_asarray
(
rng
.
rand
(
*
shape
),
dtype
=
'float32'
))
f
(
theano
.
_asarray
(
rng
.
rand
(
*
shape
),
dtype
=
'float32'
))
def
test_elemwise3
():
def
test_elemwise3
():
""" Several kinds of elemwise expressions with dimension permutations and broadcasting"""
""" Several kinds of elemwise expressions with dimension
permutations and broadcasting"""
shape
=
(
3
,
4
,
5
,
6
)
shape
=
(
3
,
4
,
5
,
6
)
a
=
tcn
.
shared_constructor
(
theano
.
_asarray
(
numpy
.
random
.
rand
(
*
shape
),
dtype
=
'float32'
),
'a'
)
a
=
tcn
.
shared_constructor
(
theano
.
_asarray
(
numpy
.
random
.
rand
(
*
shape
),
dtype
=
'float32'
),
'a'
)
b
=
tensor
.
fvector
()
b
=
tensor
.
fvector
()
print
b
.
type
print
b
.
type
print
tensor
.
constant
(
1
)
.
type
print
tensor
.
constant
(
1
)
.
type
print
(
1
+
b
)
.
type
print
(
1
+
b
)
.
type
print
(
1
+
b
**
a
)
.
type
print
(
1
+
b
**
a
)
.
type
print
tensor
.
exp
((
1
+
b
**
a
))
.
type
print
tensor
.
exp
((
1
+
b
**
a
))
.
type
f
=
pfunc
([
b
],
[],
updates
=
[(
a
,
(
a
+
b
)
.
dimshuffle
([
2
,
0
,
3
,
1
])
*
tensor
.
exp
(
1
+
new_val
=
(
a
+
b
)
.
dimshuffle
([
2
,
0
,
3
,
1
])
b
**
a
)
.
dimshuffle
([
2
,
0
,
3
,
1
]))],
mode
=
mode_with_gpu
)
new_val
*=
tensor
.
exp
(
1
+
b
**
a
)
.
dimshuffle
([
2
,
0
,
3
,
1
])
f
=
pfunc
([
b
],
[],
updates
=
[(
a
,
new_val
)],
mode
=
mode_with_gpu
)
has_elemwise
=
False
has_elemwise
=
False
for
i
,
node
in
enumerate
(
f
.
maker
.
env
.
toposort
()):
for
i
,
node
in
enumerate
(
f
.
maker
.
env
.
toposort
()):
print
>>
sys
.
stdout
,
i
,
node
print
>>
sys
.
stdout
,
i
,
node
...
@@ -339,75 +379,86 @@ def test_elemwise3():
...
@@ -339,75 +379,86 @@ def test_elemwise3():
#let debugmode catch errors
#let debugmode catch errors
f
(
theano
.
_asarray
(
numpy
.
random
.
rand
(
6
),
dtype
=
'float32'
))
f
(
theano
.
_asarray
(
numpy
.
random
.
rand
(
6
),
dtype
=
'float32'
))
def
test_elemwise4
():
def
test_elemwise4
():
""" Test that two vectors can be broadcast to form an outer product (by performing rank-1 matrix update"""
""" Test that two vectors can be broadcast to form an outer
product (by performing rank-1 matrix update"""
shape
=
(
3
,
4
)
shape
=
(
3
,
4
)
a
=
tcn
.
shared_constructor
(
theano
.
_asarray
(
numpy
.
random
.
rand
(
*
shape
),
dtype
=
'float32'
),
'a'
)
a
=
tcn
.
shared_constructor
(
theano
.
_asarray
(
numpy
.
random
.
rand
(
*
shape
),
dtype
=
'float32'
),
'a'
)
b
=
tensor
.
fvector
()
b
=
tensor
.
fvector
()
c
=
tensor
.
fvector
()
c
=
tensor
.
fvector
()
f
=
pfunc
([
b
,
c
],
[],
updates
=
[(
a
,
(
a
+
b
.
dimshuffle
(
'x'
,
0
)
*
c
.
dimshuffle
(
0
,
'x'
)))],
mode
=
mode_with_gpu
)
f
=
pfunc
([
b
,
c
],
[],
updates
=
[(
a
,
(
a
+
b
.
dimshuffle
(
'x'
,
0
)
*
c
.
dimshuffle
(
0
,
'x'
)))],
mode
=
mode_with_gpu
)
has_elemwise
=
False
has_elemwise
=
False
for
i
,
node
in
enumerate
(
f
.
maker
.
env
.
toposort
()):
for
i
,
node
in
enumerate
(
f
.
maker
.
env
.
toposort
()):
print
>>
sys
.
stdout
,
i
,
node
print
>>
sys
.
stdout
,
i
,
node
has_elemwise
=
has_elemwise
or
isinstance
(
node
.
op
,
tensor
.
Elemwise
)
has_elemwise
=
has_elemwise
or
isinstance
(
node
.
op
,
tensor
.
Elemwise
)
assert
not
has_elemwise
assert
not
has_elemwise
#let debugmode catch errors
#let debugmode catch errors
f
(
theano
.
_asarray
(
numpy
.
random
.
rand
(
4
),
dtype
=
'float32'
),
theano
.
_asarray
(
numpy
.
random
.
rand
(
3
),
dtype
=
'float32'
))
f
(
theano
.
_asarray
(
numpy
.
random
.
rand
(
4
),
dtype
=
'float32'
),
theano
.
_asarray
(
numpy
.
random
.
rand
(
3
),
dtype
=
'float32'
))
def
test_elemwise_comparaison_cast
():
def
test_elemwise_comparaison_cast
():
"""
"""
test if an elemwise comparaison followed by a cast to float32 are pushed to gpu.
test if an elemwise comparaison followed by a cast to float32 are
pushed to gpu.
"""
"""
a
=
tensor
.
fmatrix
()
a
=
tensor
.
fmatrix
()
b
=
tensor
.
fmatrix
()
b
=
tensor
.
fmatrix
()
av
=
theano
.
_asarray
(
numpy
.
random
.
rand
(
4
,
4
),
dtype
=
'float32'
)
av
=
theano
.
_asarray
(
numpy
.
random
.
rand
(
4
,
4
),
dtype
=
'float32'
)
bv
=
numpy
.
ones
((
4
,
4
),
dtype
=
'float32'
)
bv
=
numpy
.
ones
((
4
,
4
),
dtype
=
'float32'
)
for
g
,
ans
in
[(
tensor
.
lt
,
av
<
bv
),
(
tensor
.
gt
,
av
>
bv
),
for
g
,
ans
in
[(
tensor
.
lt
,
av
<
bv
),
(
tensor
.
gt
,
av
>
bv
),
(
tensor
.
le
,
av
<=
bv
),
(
tensor
.
ge
,
av
>=
bv
)]:
(
tensor
.
le
,
av
<=
bv
),
(
tensor
.
ge
,
av
>=
bv
)]:
f
=
pfunc
([
a
,
b
],
tensor
.
cast
(
g
(
a
,
b
),
'float32'
),
mode
=
mode_with_gpu
)
f
=
pfunc
([
a
,
b
],
tensor
.
cast
(
g
(
a
,
b
),
'float32'
),
mode
=
mode_with_gpu
)
#theano.printing.debugprint(f)
#theano.printing.debugprint(f)
out
=
f
(
av
,
bv
)
out
=
f
(
av
,
bv
)
assert
numpy
.
all
(
out
==
ans
)
assert
numpy
.
all
(
out
==
ans
)
assert
any
([
isinstance
(
node
.
op
,
cuda
.
GpuElemwise
)
for
node
in
f
.
maker
.
env
.
toposort
()])
assert
any
([
isinstance
(
node
.
op
,
cuda
.
GpuElemwise
)
#assert any([isinstance(node.op, tensor.Elemwise) for node in f.maker.env.toposort()])
for
node
in
f
.
maker
.
env
.
toposort
()])
def
test_elemwise_composite_float64
():
def
test_elemwise_composite_float64
():
# test that we don't fuse composite elemwise with float64 somewhere inside
# test that we don't fuse composite elemwise with float64 somewhere inside
# nvcc by default downcast them to float32. We would need to tell him not
to
# nvcc by default downcast them to float32. We would need to tell him not
# do so, but that possible only on some device.
#
to
do so, but that possible only on some device.
a
=
tensor
.
fmatrix
()
a
=
tensor
.
fmatrix
()
b
=
tensor
.
fmatrix
()
b
=
tensor
.
fmatrix
()
av
=
theano
.
_asarray
(
numpy
.
random
.
rand
(
4
,
4
),
dtype
=
'float32'
)
av
=
theano
.
_asarray
(
numpy
.
random
.
rand
(
4
,
4
),
dtype
=
'float32'
)
bv
=
numpy
.
ones
((
4
,
4
),
dtype
=
'float32'
)
bv
=
numpy
.
ones
((
4
,
4
),
dtype
=
'float32'
)
def
get_all_basic_scalar
(
composite_op
):
def
get_all_basic_scalar
(
composite_op
):
l
=
[]
l
=
[]
for
i
in
composite_op
.
env
.
toposort
():
for
i
in
composite_op
.
env
.
toposort
():
if
isinstance
(
i
,
theano
.
scalar
.
Composite
):
if
isinstance
(
i
,
theano
.
scalar
.
Composite
):
l
+=
get_all_basic_scalar
(
i
)
l
+=
get_all_basic_scalar
(
i
)
else
:
else
:
l
.
append
(
i
)
l
.
append
(
i
)
return
l
return
l
for
mode
in
[
mode_with_gpu
,
mode_with_gpu
.
excluding
(
'gpu_after_fusion'
),
mode_with_gpu
.
excluding
(
'elemwise_fusion'
)]:
for
mode
in
[
mode_with_gpu
,
mode_with_gpu
.
excluding
(
'gpu_after_fusion'
),
f
=
pfunc
([
a
,
b
],
tensor
.
cast
(
tensor
.
lt
(
tensor
.
cast
(
a
,
'float64'
)
**
2
,
#*numpy.asarray(2, 'float32'),
mode_with_gpu
.
excluding
(
'elemwise_fusion'
)]:
f
=
pfunc
([
a
,
b
],
tensor
.
cast
(
tensor
.
lt
(
tensor
.
cast
(
a
,
'float64'
)
**
2
,
b
),
b
),
'float32'
),
mode
=
mode
)
'float32'
),
mode
=
mode
)
#theano.printing.debugprint(f, print_type=True)
#theano.printing.debugprint(f, print_type=True)
out
=
f
(
av
,
bv
)
out
=
f
(
av
,
bv
)
assert
numpy
.
all
(
out
==
((
av
**
2
)
<
bv
))
assert
numpy
.
all
(
out
==
((
av
**
2
)
<
bv
))
for
node
in
f
.
maker
.
env
.
toposort
():
for
node
in
f
.
maker
.
env
.
toposort
():
if
isinstance
(
node
.
op
,
cuda
.
GpuElemwise
):
if
isinstance
(
node
.
op
,
cuda
.
GpuElemwise
):
if
isinstance
(
node
.
op
.
scalar_op
,
theano
.
scalar
.
Composite
):
if
isinstance
(
node
.
op
.
scalar_op
,
theano
.
scalar
.
Composite
):
scals
=
get_all_basic_scalar
(
node
.
op
.
scalar_op
)
scals
=
get_all_basic_scalar
(
node
.
op
.
scalar_op
)
for
s
in
scals
:
for
s
in
scals
:
assert
not
any
([
i
.
type
.
dtype
==
'float64'
for
i
in
s
.
inputs
+
s
.
outputs
])
assert
not
any
([
i
.
type
.
dtype
==
'float64'
for
i
in
s
.
inputs
+
s
.
outputs
])
def
test_elemwise_composite_support_code
():
def
test_elemwise_composite_support_code
():
...
@@ -443,205 +494,226 @@ def test_elemwise_composite_support_code():
...
@@ -443,205 +494,226 @@ def test_elemwise_composite_support_code():
def
speed_elemwise_collapse
():
def
speed_elemwise_collapse
():
""" used to time if the collapse of ccontiguous dims are useful """
""" used to time if the collapse of ccontiguous dims are useful """
shape
=
(
30
,
40
,
50
,
600
)
shape
=
(
30
,
40
,
50
,
600
)
a
=
cuda_ndarray
.
CudaNdarray
(
theano
.
_asarray
(
numpy
.
random
.
rand
(
*
shape
),
dtype
=
'float32'
))
a
=
cuda_ndarray
.
CudaNdarray
(
theano
.
_asarray
(
numpy
.
random
.
rand
(
*
shape
),
a
=
theano
.
_asarray
(
numpy
.
random
.
rand
(
*
shape
),
dtype
=
'float32'
)
dtype
=
'float32'
))
a
=
theano
.
_asarray
(
numpy
.
random
.
rand
(
*
shape
),
dtype
=
'float32'
)
a2
=
tcn
.
shared_constructor
(
a
,
'a'
)
a2
=
tcn
.
shared_constructor
(
a
,
'a'
)
a3
=
a2
[:,
::
2
,:,
:]
a3
=
a2
[:,
::
2
,
:,
:]
b
=
tcn
.
CudaNdarrayType
((
False
,
False
,
False
,
False
))()
b
=
tcn
.
CudaNdarrayType
((
False
,
False
,
False
,
False
))()
c
=
a3
+
b
*
tensor
.
exp
(
1
+
b
**
a3
)
c
=
a3
+
b
*
tensor
.
exp
(
1
+
b
**
a3
)
f
=
pfunc
([
b
],
[
c
],
mode
=
mode_with_gpu
)
f
=
pfunc
([
b
],
[
c
],
mode
=
mode_with_gpu
)
v
=
theano
.
_asarray
(
numpy
.
random
.
rand
(
*
shape
),
dtype
=
'float32'
)
v
=
theano
.
_asarray
(
numpy
.
random
.
rand
(
*
shape
),
dtype
=
'float32'
)
v
=
v
[:,
::
2
,
:,
:]
v
=
v
[:,::
2
,:,:]
v
=
cuda_ndarray
.
CudaNdarray
(
v
)
v
=
cuda_ndarray
.
CudaNdarray
(
v
)
for
id
,
n
in
enumerate
(
f
.
maker
.
env
.
toposort
()):
for
id
,
n
in
enumerate
(
f
.
maker
.
env
.
toposort
()):
print
id
,
n
print
id
,
n
t1
=
time
.
time
()
t1
=
time
.
time
()
for
i
in
range
(
100
):
for
i
in
range
(
100
):
#let debugmode catch errors
#let debugmode catch errors
f
(
v
)
f
(
v
)
t2
=
time
.
time
()
t2
=
time
.
time
()
def
speed_elemwise_collapse2
():
def
speed_elemwise_collapse2
():
""" used to test the speed up of the generalised collapse of ccontiguous dims"""
""" used to test the speed up of the generalised collapse of
ccontiguous dims"""
shape
=
(
30
,
40
,
50
,
600
)
shape
=
(
30
,
40
,
50
,
600
)
a
=
cuda_ndarray
.
CudaNdarray
(
theano
.
_asarray
(
numpy
.
random
.
rand
(
*
shape
),
dtype
=
'float32'
))
a
=
cuda_ndarray
.
CudaNdarray
(
theano
.
_asarray
(
numpy
.
random
.
rand
(
*
shape
),
a
=
theano
.
_asarray
(
numpy
.
random
.
rand
(
*
shape
),
dtype
=
'float32'
)
dtype
=
'float32'
))
a
=
theano
.
_asarray
(
numpy
.
random
.
rand
(
*
shape
),
dtype
=
'float32'
)
a2
=
tcn
.
shared_constructor
(
a
,
'a'
)
a2
=
tcn
.
shared_constructor
(
a
,
'a'
)
a3
=
a2
[:,
:,:,
::
2
]
a3
=
a2
[:,
:,
:,
::
2
]
b
=
tcn
.
CudaNdarrayType
((
False
,
False
,
False
,
False
))()
b
=
tcn
.
CudaNdarrayType
((
False
,
False
,
False
,
False
))()
c
=
a3
+
b
*
tensor
.
exp
(
1
+
b
**
a3
)
c
=
a3
+
b
*
tensor
.
exp
(
1
+
b
**
a3
)
f
=
pfunc
([
b
],
[
c
],
mode
=
mode_with_gpu
)
f
=
pfunc
([
b
],
[
c
],
mode
=
mode_with_gpu
)
v
=
theano
.
_asarray
(
numpy
.
random
.
rand
(
*
shape
),
dtype
=
'float32'
)
v
=
theano
.
_asarray
(
numpy
.
random
.
rand
(
*
shape
),
dtype
=
'float32'
)
v
=
v
[:,
:,
:,
::
2
]
v
=
v
[:,:,:,::
2
]
v
=
cuda_ndarray
.
CudaNdarray
(
v
)
v
=
cuda_ndarray
.
CudaNdarray
(
v
)
for
id
,
n
in
enumerate
(
f
.
maker
.
env
.
toposort
()):
for
id
,
n
in
enumerate
(
f
.
maker
.
env
.
toposort
()):
print
id
,
n
print
id
,
n
t1
=
time
.
time
()
t1
=
time
.
time
()
for
i
in
range
(
100
):
for
i
in
range
(
100
):
#let debugmode catch errors
#let debugmode catch errors
f
(
v
)
f
(
v
)
t2
=
time
.
time
()
t2
=
time
.
time
()
def
test_elemwise_collapse
():
def
test_elemwise_collapse
():
""" Test when all inputs have one(and the same) broadcastable dimension """
""" Test when all inputs have one(and the same) broadcastable dimension """
shape
=
(
4
,
5
,
60
)
shape
=
(
4
,
5
,
60
)
a
=
cuda_ndarray
.
CudaNdarray
(
theano
.
_asarray
(
numpy
.
random
.
rand
(
*
shape
),
dtype
=
'float32'
))
a
=
cuda_ndarray
.
CudaNdarray
(
theano
.
_asarray
(
numpy
.
random
.
rand
(
*
shape
),
a
=
theano
.
_asarray
(
numpy
.
random
.
rand
(
*
shape
),
dtype
=
'float32'
)
dtype
=
'float32'
))
a
=
theano
.
_asarray
(
numpy
.
random
.
rand
(
*
shape
),
dtype
=
'float32'
)
a2
=
tcn
.
shared_constructor
(
a
,
'a'
)
a2
=
tcn
.
shared_constructor
(
a
,
'a'
)
a3
=
a2
.
dimshuffle
(
0
,
'x'
,
1
,
2
)
a3
=
a2
.
dimshuffle
(
0
,
'x'
,
1
,
2
)
b
=
tcn
.
CudaNdarrayType
((
False
,
True
,
False
,
False
))()
b
=
tcn
.
CudaNdarrayType
((
False
,
True
,
False
,
False
))()
c
=
a3
+
b
c
=
a3
+
b
f
=
pfunc
([
b
],
[
c
],
mode
=
mode_with_gpu
)
f
=
pfunc
([
b
],
[
c
],
mode
=
mode_with_gpu
)
v
=
theano
.
_asarray
(
numpy
.
random
.
rand
(
shape
[
0
],
1
,
*
shape
[
1
:]),
v
=
theano
.
_asarray
(
numpy
.
random
.
rand
(
shape
[
0
],
1
,
*
shape
[
1
:]),
dtype
=
'float32'
)
dtype
=
'float32'
)
v
=
cuda_ndarray
.
CudaNdarray
(
v
)
v
=
cuda_ndarray
.
CudaNdarray
(
v
)
if
False
:
if
False
:
for
id
,
n
in
enumerate
(
f
.
maker
.
env
.
toposort
()):
for
id
,
n
in
enumerate
(
f
.
maker
.
env
.
toposort
()):
print
id
,
n
print
id
,
n
#let debugmode catch errors
#let debugmode catch errors
out
=
f
(
v
)[
0
]
out
=
f
(
v
)[
0
]
assert
numpy
.
allclose
(
out
,
a
.
reshape
(
shape
[
0
],
1
,
*
shape
[
1
:])
+
v
)
assert
numpy
.
allclose
(
out
,
a
.
reshape
(
shape
[
0
],
1
,
*
shape
[
1
:])
+
v
)
print
"Expected collapse of all dimensions"
print
"Expected collapse of all dimensions"
def
test_elemwise_collapse2
():
def
test_elemwise_collapse2
():
""" Test when only one inputs have one broadcastable dimension """
""" Test when only one inputs have one broadcastable dimension """
shape
=
(
4
,
5
,
9
)
shape
=
(
4
,
5
,
9
)
a
=
cuda_ndarray
.
CudaNdarray
(
theano
.
_asarray
(
numpy
.
random
.
rand
(
*
shape
),
dtype
=
'float32'
))
a
=
cuda_ndarray
.
CudaNdarray
(
theano
.
_asarray
(
numpy
.
random
.
rand
(
*
shape
),
a
=
theano
.
_asarray
(
numpy
.
random
.
rand
(
*
shape
),
dtype
=
'float32'
)
dtype
=
'float32'
))
a
=
theano
.
_asarray
(
numpy
.
random
.
rand
(
*
shape
),
dtype
=
'float32'
)
a2
=
tcn
.
shared_constructor
(
a
,
'a'
)
a2
=
tcn
.
shared_constructor
(
a
,
'a'
)
a3
=
a2
.
dimshuffle
(
0
,
'x'
,
1
,
2
)
a3
=
a2
.
dimshuffle
(
0
,
'x'
,
1
,
2
)
b
=
tcn
.
CudaNdarrayType
((
False
,
False
,
False
,
False
))()
b
=
tcn
.
CudaNdarrayType
((
False
,
False
,
False
,
False
))()
c
=
a3
+
b
c
=
a3
+
b
f
=
pfunc
([
b
],
[
c
],
mode
=
mode_with_gpu
)
f
=
pfunc
([
b
],
[
c
],
mode
=
mode_with_gpu
)
v
=
theano
.
_asarray
(
numpy
.
random
.
rand
(
shape
[
0
],
5
,
*
shape
[
1
:]),
v
=
theano
.
_asarray
(
numpy
.
random
.
rand
(
shape
[
0
],
5
,
*
shape
[
1
:]),
dtype
=
'float32'
)
dtype
=
'float32'
)
v
=
cuda_ndarray
.
CudaNdarray
(
v
)
v
=
cuda_ndarray
.
CudaNdarray
(
v
)
if
False
:
if
False
:
for
id
,
n
in
enumerate
(
f
.
maker
.
env
.
toposort
()):
for
id
,
n
in
enumerate
(
f
.
maker
.
env
.
toposort
()):
print
id
,
n
print
id
,
n
#let debugmode catch errors
#let debugmode catch errors
out
=
f
(
v
)[
0
]
out
=
f
(
v
)[
0
]
assert
numpy
.
allclose
(
out
,
a
.
reshape
(
shape
[
0
],
1
,
*
shape
[
1
:])
+
v
)
assert
numpy
.
allclose
(
out
,
a
.
reshape
(
shape
[
0
],
1
,
*
shape
[
1
:])
+
v
)
print
"Expected collapse to 3 dimensions"
print
"Expected collapse to 3 dimensions"
def
test_elemwise_collapse3
():
def
test_elemwise_collapse3
():
""" Test when only one inputs have two broadcastable dimension at each ends """
""" Test when only one inputs have two broadcastable dimension at each ends """
shape
=
(
4
,
5
)
shape
=
(
4
,
5
)
a
=
cuda_ndarray
.
CudaNdarray
(
theano
.
_asarray
(
numpy
.
random
.
rand
(
*
shape
),
dtype
=
'float32'
))
a
=
cuda_ndarray
.
CudaNdarray
(
theano
.
_asarray
(
numpy
.
random
.
rand
(
*
shape
),
a
=
theano
.
_asarray
(
numpy
.
random
.
rand
(
*
shape
),
dtype
=
'float32'
)
dtype
=
'float32'
))
a
=
theano
.
_asarray
(
numpy
.
random
.
rand
(
*
shape
),
dtype
=
'float32'
)
a2
=
tcn
.
shared_constructor
(
a
,
'a'
)
a2
=
tcn
.
shared_constructor
(
a
,
'a'
)
a3
=
a2
.
dimshuffle
(
'x'
,
0
,
1
,
'x'
)
a3
=
a2
.
dimshuffle
(
'x'
,
0
,
1
,
'x'
)
b
=
tcn
.
CudaNdarrayType
((
False
,
False
,
False
,
False
))()
b
=
tcn
.
CudaNdarrayType
((
False
,
False
,
False
,
False
))()
c
=
(
a3
+
b
)
c
=
(
a3
+
b
)
f
=
pfunc
([
b
],
[
c
],
mode
=
mode_with_gpu
)
f
=
pfunc
([
b
],
[
c
],
mode
=
mode_with_gpu
)
v
=
theano
.
_asarray
(
numpy
.
random
.
rand
(
5
,
shape
[
0
],
shape
[
1
],
4
),
v
=
theano
.
_asarray
(
numpy
.
random
.
rand
(
5
,
shape
[
0
],
shape
[
1
],
4
),
dtype
=
'float32'
)
dtype
=
'float32'
)
v
=
cuda_ndarray
.
CudaNdarray
(
v
)
v
=
cuda_ndarray
.
CudaNdarray
(
v
)
if
False
:
if
False
:
for
id
,
n
in
enumerate
(
f
.
maker
.
env
.
toposort
()):
for
id
,
n
in
enumerate
(
f
.
maker
.
env
.
toposort
()):
print
id
,
n
print
id
,
n
#let debugmode catch errors
#let debugmode catch errors
out
=
f
(
v
)[
0
]
out
=
f
(
v
)[
0
]
assert
numpy
.
allclose
(
out
,
a
.
reshape
(
1
,
shape
[
0
],
shape
[
1
],
1
)
+
v
)
assert
numpy
.
allclose
(
out
,
a
.
reshape
(
1
,
shape
[
0
],
shape
[
1
],
1
)
+
v
)
print
"Expected collapse to 3 dimensions"
print
"Expected collapse to 3 dimensions"
def
test_elemwise_collapse4
():
def
test_elemwise_collapse4
():
""" Test when only one inputs have two broadcastable dimension at each ends and we add a scalar"""
""" Test when only one inputs have two broadcastable dimension at
each ends and we add a scalar"""
shape
=
(
4
,
5
)
shape
=
(
4
,
5
)
a
=
cuda_ndarray
.
CudaNdarray
(
theano
.
_asarray
(
numpy
.
random
.
rand
(
*
shape
),
dtype
=
'float32'
))
a
=
cuda_ndarray
.
CudaNdarray
(
theano
.
_asarray
(
numpy
.
random
.
rand
(
*
shape
),
a
=
theano
.
_asarray
(
numpy
.
random
.
rand
(
*
shape
),
dtype
=
'float32'
)
dtype
=
'float32'
))
a
=
theano
.
_asarray
(
numpy
.
random
.
rand
(
*
shape
),
dtype
=
'float32'
)
a2
=
tcn
.
shared_constructor
(
a
,
'a'
)
a2
=
tcn
.
shared_constructor
(
a
,
'a'
)
a3
=
a2
.
dimshuffle
(
'x'
,
0
,
1
,
'x'
)
a3
=
a2
.
dimshuffle
(
'x'
,
0
,
1
,
'x'
)
b
=
tcn
.
CudaNdarrayType
((
False
,
False
,
False
,
False
))()
b
=
tcn
.
CudaNdarrayType
((
False
,
False
,
False
,
False
))()
c
=
(
a3
+
b
+
2
)
c
=
(
a3
+
b
+
2
)
f
=
pfunc
([
b
],
[
c
],
mode
=
mode_with_gpu
)
f
=
pfunc
([
b
],
[
c
],
mode
=
mode_with_gpu
)
v
=
theano
.
_asarray
(
numpy
.
random
.
rand
(
5
,
shape
[
0
],
shape
[
1
],
4
),
v
=
theano
.
_asarray
(
numpy
.
random
.
rand
(
5
,
shape
[
0
],
shape
[
1
],
4
),
dtype
=
'float32'
)
dtype
=
'float32'
)
v
=
cuda_ndarray
.
CudaNdarray
(
v
)
v
=
cuda_ndarray
.
CudaNdarray
(
v
)
if
False
:
if
False
:
for
id
,
n
in
enumerate
(
f
.
maker
.
env
.
toposort
()):
for
id
,
n
in
enumerate
(
f
.
maker
.
env
.
toposort
()):
print
id
,
n
print
id
,
n
#let debugmode catch errors
#let debugmode catch errors
out
=
f
(
v
)[
0
]
out
=
f
(
v
)[
0
]
assert
numpy
.
allclose
(
out
,
a
.
reshape
(
1
,
shape
[
0
],
shape
[
1
],
1
)
+
v
+
2
)
assert
numpy
.
allclose
(
out
,
a
.
reshape
(
1
,
shape
[
0
],
shape
[
1
],
1
)
+
v
+
2
)
print
"Expected collapse to 3 dimensions"
print
"Expected collapse to 3 dimensions"
def
test_elemwise_collapse5
():
def
test_elemwise_collapse5
():
""" Test when only one inputs have two broadcastable dimension at the beginning and we add a scalar"""
""" Test when only one inputs have two broadcastable dimension at
the beginning and we add a scalar"""
shape
=
(
4
,
5
)
shape
=
(
4
,
5
)
a
=
cuda_ndarray
.
CudaNdarray
(
theano
.
_asarray
(
numpy
.
random
.
rand
(
*
shape
),
dtype
=
'float32'
))
a
=
cuda_ndarray
.
CudaNdarray
(
theano
.
_asarray
(
numpy
.
random
.
rand
(
*
shape
),
a
=
theano
.
_asarray
(
numpy
.
random
.
rand
(
*
shape
),
dtype
=
'float32'
)
dtype
=
'float32'
))
a
=
theano
.
_asarray
(
numpy
.
random
.
rand
(
*
shape
),
dtype
=
'float32'
)
a2
=
tcn
.
shared_constructor
(
a
,
'a'
)
a2
=
tcn
.
shared_constructor
(
a
,
'a'
)
a3
=
a2
.
dimshuffle
(
'x'
,
'x'
,
0
,
1
)
a3
=
a2
.
dimshuffle
(
'x'
,
'x'
,
0
,
1
)
b
=
tcn
.
CudaNdarrayType
((
False
,
False
,
False
,
False
))()
b
=
tcn
.
CudaNdarrayType
((
False
,
False
,
False
,
False
))()
c
=
(
a3
+
b
+
2
)
c
=
(
a3
+
b
+
2
)
f
=
pfunc
([
b
],
[
c
],
mode
=
mode_with_gpu
)
f
=
pfunc
([
b
],
[
c
],
mode
=
mode_with_gpu
)
v
=
theano
.
_asarray
(
numpy
.
random
.
rand
(
5
,
4
,
shape
[
0
],
shape
[
1
]),
v
=
theano
.
_asarray
(
numpy
.
random
.
rand
(
5
,
4
,
shape
[
0
],
shape
[
1
]),
dtype
=
'float32'
)
dtype
=
'float32'
)
v
=
cuda_ndarray
.
CudaNdarray
(
v
)
v
=
cuda_ndarray
.
CudaNdarray
(
v
)
if
False
:
if
False
:
for
id
,
n
in
enumerate
(
f
.
maker
.
env
.
toposort
()):
for
id
,
n
in
enumerate
(
f
.
maker
.
env
.
toposort
()):
print
id
,
n
print
id
,
n
#let debugmode catch errors
#let debugmode catch errors
out
=
f
(
v
)[
0
]
out
=
f
(
v
)[
0
]
assert
numpy
.
allclose
(
out
,
a
.
reshape
(
1
,
1
,
shape
[
0
],
shape
[
1
])
+
v
+
2
)
assert
numpy
.
allclose
(
out
,
a
.
reshape
(
1
,
1
,
shape
[
0
],
shape
[
1
])
+
v
+
2
)
print
"Expected collapse to 2 dimensions"
print
"Expected collapse to 2 dimensions"
def
test_elemwise_collapse6
():
def
test_elemwise_collapse6
():
""" Test when all inputs have two broadcastable dimension at the beginning"""
""" Test when all inputs have two broadcastable dimension at the
beginning"""
shape
=
(
4
,
5
)
shape
=
(
4
,
5
)
a
=
cuda_ndarray
.
CudaNdarray
(
theano
.
_asarray
(
numpy
.
random
.
rand
(
*
shape
),
dtype
=
'float32'
))
a
=
cuda_ndarray
.
CudaNdarray
(
theano
.
_asarray
(
numpy
.
random
.
rand
(
*
shape
),
a
=
theano
.
_asarray
(
numpy
.
random
.
rand
(
*
shape
),
dtype
=
'float32'
)
dtype
=
'float32'
))
a
=
theano
.
_asarray
(
numpy
.
random
.
rand
(
*
shape
),
dtype
=
'float32'
)
a2
=
tcn
.
shared_constructor
(
a
,
'a'
)
a2
=
tcn
.
shared_constructor
(
a
,
'a'
)
a3
=
a2
.
dimshuffle
(
'x'
,
'x'
,
0
,
1
)
a3
=
a2
.
dimshuffle
(
'x'
,
'x'
,
0
,
1
)
b
=
tcn
.
CudaNdarrayType
((
True
,
True
,
False
,
False
))()
b
=
tcn
.
CudaNdarrayType
((
True
,
True
,
False
,
False
))()
f
=
pfunc
([
b
],
[
a3
+
b
],
mode
=
mode_with_gpu
)
f
=
pfunc
([
b
],
[
a3
+
b
],
mode
=
mode_with_gpu
)
v
=
theano
.
_asarray
(
numpy
.
random
.
rand
(
1
,
1
,
shape
[
0
],
shape
[
1
]),
dtype
=
'float32'
)
v
=
theano
.
_asarray
(
numpy
.
random
.
rand
(
1
,
1
,
shape
[
0
],
shape
[
1
]),
v
=
cuda_ndarray
.
CudaNdarray
(
v
)
dtype
=
'float32'
)
v
=
cuda_ndarray
.
CudaNdarray
(
v
)
if
False
:
if
False
:
for
id
,
n
in
enumerate
(
f
.
maker
.
env
.
toposort
()):
for
id
,
n
in
enumerate
(
f
.
maker
.
env
.
toposort
()):
print
id
,
n
print
id
,
n
#let debugmode catch errors
#let debugmode catch errors
out
=
f
(
v
)[
0
]
out
=
f
(
v
)[
0
]
assert
numpy
.
allclose
(
out
,
a
.
reshape
(
1
,
1
,
shape
[
0
],
shape
[
1
])
+
v
)
assert
numpy
.
allclose
(
out
,
a
.
reshape
(
1
,
1
,
shape
[
0
],
shape
[
1
])
+
v
)
print
"Expected collapse to c contiguous"
print
"Expected collapse to c contiguous"
def
test_elemwise_collapse7
(
atol
=
1e-6
):
def
test_elemwise_collapse7
(
atol
=
1e-6
):
""" Test when one input have one broadcastable dimension and the other is a scalar"""
""" Test when one input have one broadcastable dimension and the
other is a scalar"""
shape
=
(
5
,
4
,
1
)
shape
=
(
5
,
4
,
1
)
a
=
cuda_ndarray
.
CudaNdarray
(
theano
.
_asarray
(
numpy
.
random
.
rand
(
*
shape
),
dtype
=
'float32'
))
a
=
cuda_ndarray
.
CudaNdarray
(
theano
.
_asarray
(
numpy
.
random
.
rand
(
*
shape
),
a
=
theano
.
_asarray
(
numpy
.
random
.
rand
(
*
shape
),
dtype
=
'float32'
)
dtype
=
'float32'
))
a
=
theano
.
_asarray
(
numpy
.
random
.
rand
(
*
shape
),
dtype
=
'float32'
)
a2
=
tcn
.
shared_constructor
(
a
.
copy
(),
'a'
)
a2
=
tcn
.
shared_constructor
(
a
.
copy
(),
'a'
)
a3
=
a2
.
dimshuffle
(
0
,
'x'
,
1
,
2
)
a3
=
a2
.
dimshuffle
(
0
,
'x'
,
1
,
2
)
f
=
pfunc
([],
[
a3
+
2
],
mode
=
mode_with_gpu
)
f
=
pfunc
([],
[
a3
+
2
],
mode
=
mode_with_gpu
)
if
False
:
if
False
:
for
id
,
n
in
enumerate
(
f
.
maker
.
env
.
toposort
()):
for
id
,
n
in
enumerate
(
f
.
maker
.
env
.
toposort
()):
print
id
,
n
print
id
,
n
#let debugmode catch errors
#let debugmode catch errors
out
=
f
()[
0
]
out
=
f
()[
0
]
ans
=
(
a
+
2
)
.
reshape
(
shape
[
0
],
1
,
shape
[
1
],
shape
[
2
])
ans
=
(
a
+
2
)
.
reshape
(
shape
[
0
],
1
,
shape
[
1
],
shape
[
2
])
assert
numpy
.
allclose
(
out
,
ans
,
atol
=
atol
)
assert
numpy
.
allclose
(
out
,
ans
,
atol
=
atol
)
print
"Expected collapse to c contiguous"
print
"Expected collapse to c contiguous"
...
@@ -651,40 +723,45 @@ def test_hostfromgpu_shape_i():
...
@@ -651,40 +723,45 @@ def test_hostfromgpu_shape_i():
"""
"""
pass
pass
m
=
mode_with_gpu
.
including
(
'local_dot_to_dot22'
,
'local_dot22_to_dot22scalar'
,
'specialize'
)
m
=
mode_with_gpu
.
including
(
'local_dot_to_dot22'
,
a
=
T
.
fmatrix
(
'a'
)
'local_dot22_to_dot22scalar'
,
'specialize'
)
ca
=
theano
.
sandbox
.
cuda
.
var
.
CudaNdarrayType
((
False
,
False
))()
a
=
T
.
fmatrix
(
'a'
)
ca
=
theano
.
sandbox
.
cuda
.
var
.
CudaNdarrayType
((
False
,
False
))()
av
=
numpy
.
asarray
(
numpy
.
random
.
rand
(
5
,
4
),
dtype
=
'float32'
)
av
=
numpy
.
asarray
(
numpy
.
random
.
rand
(
5
,
4
),
dtype
=
'float32'
)
cv
=
cuda
.
CudaNdarray
(
numpy
.
asarray
(
numpy
.
random
.
rand
(
5
,
4
),
dtype
=
'float32'
))
cv
=
cuda
.
CudaNdarray
(
numpy
.
asarray
(
numpy
.
random
.
rand
(
5
,
4
),
dtype
=
'float32'
))
f
=
theano
.
function
([
a
],
cuda
.
basic_ops
.
gpu_from_host
(
a
),
mode
=
m
)
f
=
theano
.
function
([
a
],
cuda
.
basic_ops
.
gpu_from_host
(
a
),
mode
=
m
)
assert
cuda
.
basic_ops
.
gpu_from_host
in
[
x
.
op
for
x
in
f
.
maker
.
env
.
toposort
()]
assert
cuda
.
basic_ops
.
gpu_from_host
in
[
x
.
op
f
=
theano
.
function
([
a
],
cuda
.
basic_ops
.
gpu_from_host
(
a
)
.
shape
,
mode
=
m
)
for
x
in
f
.
maker
.
env
.
toposort
()]
f
=
theano
.
function
([
a
],
cuda
.
basic_ops
.
gpu_from_host
(
a
)
.
shape
,
mode
=
m
)
topo
=
f
.
maker
.
env
.
toposort
()
topo
=
f
.
maker
.
env
.
toposort
()
assert
isinstance
(
topo
[
0
]
.
op
,
T
.
opt
.
Shape_i
)
assert
isinstance
(
topo
[
0
]
.
op
,
T
.
opt
.
Shape_i
)
assert
isinstance
(
topo
[
1
]
.
op
,
T
.
opt
.
Shape_i
)
assert
isinstance
(
topo
[
1
]
.
op
,
T
.
opt
.
Shape_i
)
assert
isinstance
(
topo
[
2
]
.
op
,
T
.
opt
.
MakeVector
)
assert
isinstance
(
topo
[
2
]
.
op
,
T
.
opt
.
MakeVector
)
assert
tuple
(
f
(
av
))
==
(
5
,
4
)
assert
tuple
(
f
(
av
))
==
(
5
,
4
)
f
=
theano
.
function
([
ca
],
cuda
.
basic_ops
.
host_from_gpu
(
ca
),
mode
=
m
)
f
=
theano
.
function
([
ca
],
cuda
.
basic_ops
.
host_from_gpu
(
ca
),
mode
=
m
)
assert
cuda
.
basic_ops
.
host_from_gpu
in
[
x
.
op
for
x
in
f
.
maker
.
env
.
toposort
()]
assert
cuda
.
basic_ops
.
host_from_gpu
in
[
x
.
op
f
=
theano
.
function
([
ca
],
cuda
.
basic_ops
.
host_from_gpu
(
ca
)
.
shape
,
mode
=
m
)
for
x
in
f
.
maker
.
env
.
toposort
()]
f
=
theano
.
function
([
ca
],
cuda
.
basic_ops
.
host_from_gpu
(
ca
)
.
shape
,
mode
=
m
)
topo
=
f
.
maker
.
env
.
toposort
()
topo
=
f
.
maker
.
env
.
toposort
()
assert
isinstance
(
topo
[
0
]
.
op
,
T
.
opt
.
Shape_i
)
assert
isinstance
(
topo
[
0
]
.
op
,
T
.
opt
.
Shape_i
)
assert
isinstance
(
topo
[
1
]
.
op
,
T
.
opt
.
Shape_i
)
assert
isinstance
(
topo
[
1
]
.
op
,
T
.
opt
.
Shape_i
)
assert
isinstance
(
topo
[
2
]
.
op
,
T
.
opt
.
MakeVector
)
assert
isinstance
(
topo
[
2
]
.
op
,
T
.
opt
.
MakeVector
)
assert
tuple
(
f
(
cv
))
==
(
5
,
4
)
assert
tuple
(
f
(
cv
))
==
(
5
,
4
)
# -----------------------------------------------------------------------
# -----------------------------------------------------------------------
import
theano.sandbox.cuda
as
cuda_ndarray
import
theano.sandbox.cuda
as
cuda_ndarray
def
test_gpujoin_assert_cndas
():
def
test_gpujoin_assert_cndas
():
# this will end up being an ndarray, as it's float64
# this will end up being an ndarray, as it's float64
_a
=
numpy
.
asarray
([[
1
,
2
],[
3
,
4
]],
dtype
=
'float64'
)
_a
=
numpy
.
asarray
([[
1
,
2
],
[
3
,
4
]],
dtype
=
'float64'
)
a
=
theano
.
shared
(
_a
)
a
=
theano
.
shared
(
_a
)
try
:
try
:
...
@@ -697,64 +774,80 @@ def test_gpujoin_assert_cndas():
...
@@ -697,64 +774,80 @@ def test_gpujoin_assert_cndas():
assert
False
assert
False
def
test_gpujoin_no_rebroadcast
():
def
test_gpujoin_no_rebroadcast
():
_a
=
numpy
.
asarray
([[
1
,
2
],[
3
,
4
]],
dtype
=
'float32'
)
_a
=
numpy
.
asarray
([[
1
,
2
],
[
3
,
4
]],
dtype
=
'float32'
)
a
=
tcn
.
shared_constructor
(
_a
)
a
=
tcn
.
shared_constructor
(
_a
)
f
=
theano
.
function
([],
T
.
join
(
1
,
a
))
f
=
theano
.
function
([],
T
.
join
(
1
,
a
))
l
=
f
.
maker
.
env
.
toposort
()
l
=
f
.
maker
.
env
.
toposort
()
assert
not
any
([
isinstance
(
x
.
op
,
T
.
Rebroadcast
)
for
x
in
l
])
assert
not
any
([
isinstance
(
x
.
op
,
T
.
Rebroadcast
)
for
x
in
l
])
def
test_gpualloc_input_on_gpu
():
def
test_gpualloc_input_on_gpu
():
a_val
=
numpy
.
asarray
(
numpy
.
random
.
rand
(
4
,
5
),
dtype
=
'float32'
)
a_val
=
numpy
.
asarray
(
numpy
.
random
.
rand
(
4
,
5
),
dtype
=
'float32'
)
a
=
tcn
.
shared_constructor
(
a_val
)
a
=
tcn
.
shared_constructor
(
a_val
)
b
=
T
.
fscalar
()
b
=
T
.
fscalar
()
f
=
theano
.
function
([
b
],
T
.
ones_like
(
a
)
+
b
,
mode
=
mode_without_gpu
)
f
=
theano
.
function
([
b
],
T
.
ones_like
(
a
)
+
b
,
mode
=
mode_without_gpu
)
f_gpu
=
theano
.
function
([
b
],
T
.
ones_like
(
a
)
+
b
,
mode
=
mode_with_gpu
)
f_gpu
=
theano
.
function
([
b
],
T
.
ones_like
(
a
)
+
b
,
mode
=
mode_with_gpu
)
assert
sum
([
node
.
op
==
T
.
alloc
for
node
in
f
.
maker
.
env
.
toposort
()])
==
1
assert
sum
([
node
.
op
==
B
.
gpu_alloc
for
node
in
f_gpu
.
maker
.
env
.
toposort
()])
==
1
assert
sum
([
node
.
op
==
T
.
alloc
for
node
in
f
.
maker
.
env
.
toposort
()])
==
1
assert
numpy
.
allclose
(
numpy
.
ones
(
a
.
get_value
(
borrow
=
True
)
.
shape
)
+
9
,
assert
sum
([
node
.
op
==
B
.
gpu_alloc
for
node
in
f_gpu
.
maker
.
env
.
toposort
()])
==
1
f_gpu
(
9
))
assert
numpy
.
allclose
(
f
(
5
),
f_gpu
(
5
))
assert
numpy
.
allclose
(
numpy
.
ones
(
a
.
get_value
(
borrow
=
True
)
.
shape
)
+
9
,
f_gpu
(
9
))
assert
numpy
.
allclose
(
f
(
5
),
f_gpu
(
5
))
def
test_gpujoin_gpualloc
():
def
test_gpujoin_gpualloc
():
a
=
T
.
fmatrix
(
'a'
)
a
=
T
.
fmatrix
(
'a'
)
a_val
=
numpy
.
asarray
(
numpy
.
random
.
rand
(
4
,
5
),
dtype
=
'float32'
)
a_val
=
numpy
.
asarray
(
numpy
.
random
.
rand
(
4
,
5
),
dtype
=
'float32'
)
b
=
T
.
fmatrix
(
'b'
)
b
=
T
.
fmatrix
(
'b'
)
b_val
=
numpy
.
asarray
(
numpy
.
random
.
rand
(
3
,
5
),
dtype
=
'float32'
)
b_val
=
numpy
.
asarray
(
numpy
.
random
.
rand
(
3
,
5
),
dtype
=
'float32'
)
f
=
theano
.
function
([
a
,
b
],
T
.
join
(
0
,
T
.
zeros_like
(
a
),
T
.
ones_like
(
b
))
+
4
,
mode
=
mode_without_gpu
)
f
=
theano
.
function
([
a
,
b
],
T
.
join
(
0
,
T
.
zeros_like
(
a
),
T
.
ones_like
(
b
))
+
4
,
f_gpu
=
theano
.
function
([
a
,
b
],
T
.
join
(
0
,
T
.
zeros_like
(
a
),
T
.
ones_like
(
b
)),
mode
=
mode_with_gpu
)
mode
=
mode_without_gpu
)
f_gpu2
=
theano
.
function
([
a
,
b
],
T
.
join
(
0
,
T
.
zeros_like
(
a
),
T
.
ones_like
(
b
))
+
4
,
mode
=
mode_with_gpu
)
f_gpu
=
theano
.
function
([
a
,
b
],
T
.
join
(
0
,
T
.
zeros_like
(
a
),
T
.
ones_like
(
b
)),
mode
=
mode_with_gpu
)
f_gpu2
=
theano
.
function
([
a
,
b
],
T
.
join
(
0
,
T
.
zeros_like
(
a
),
T
.
ones_like
(
b
))
+
4
,
mode
=
mode_with_gpu
)
assert
sum
([
node
.
op
==
T
.
alloc
for
node
in
f
.
maker
.
env
.
toposort
()])
==
2
assert
sum
([
node
.
op
==
T
.
join
for
node
in
f
.
maker
.
env
.
toposort
()])
==
1
assert
sum
([
node
.
op
==
B
.
gpu_alloc
for
node
in
f_gpu
.
maker
.
env
.
toposort
()])
==
2
assert
sum
([
node
.
op
==
B
.
gpu_join
for
node
in
f_gpu
.
maker
.
env
.
toposort
()])
==
1
assert
sum
([
node
.
op
==
B
.
gpu_alloc
for
node
in
f_gpu2
.
maker
.
env
.
toposort
()])
==
2
assert
sum
([
node
.
op
==
B
.
gpu_join
for
node
in
f_gpu2
.
maker
.
env
.
toposort
()])
==
1
assert
numpy
.
allclose
(
f
(
a_val
,
b_val
),
f_gpu2
(
a_val
,
b_val
))
assert
sum
([
node
.
op
==
T
.
alloc
for
node
in
f
.
maker
.
env
.
toposort
()])
==
2
assert
sum
([
node
.
op
==
T
.
join
for
node
in
f
.
maker
.
env
.
toposort
()])
==
1
assert
sum
([
node
.
op
==
B
.
gpu_alloc
for
node
in
f_gpu
.
maker
.
env
.
toposort
()])
==
2
assert
sum
([
node
.
op
==
B
.
gpu_join
for
node
in
f_gpu
.
maker
.
env
.
toposort
()])
==
1
assert
sum
([
node
.
op
==
B
.
gpu_alloc
for
node
in
f_gpu2
.
maker
.
env
.
toposort
()])
==
2
assert
sum
([
node
.
op
==
B
.
gpu_join
for
node
in
f_gpu2
.
maker
.
env
.
toposort
()])
==
1
assert
numpy
.
allclose
(
f
(
a_val
,
b_val
),
f_gpu2
(
a_val
,
b_val
))
def
test_gpualloc_output_to_gpu
():
def
test_gpualloc_output_to_gpu
():
a_val
=
numpy
.
asarray
(
numpy
.
random
.
rand
(
4
,
5
),
dtype
=
'float32'
)
a_val
=
numpy
.
asarray
(
numpy
.
random
.
rand
(
4
,
5
),
dtype
=
'float32'
)
a
=
tcn
.
shared_constructor
(
a_val
)
a
=
tcn
.
shared_constructor
(
a_val
)
b
=
T
.
fscalar
()
b
=
T
.
fscalar
()
f
=
theano
.
function
([
b
],
T
.
ones_like
(
a
)
+
b
,
mode
=
mode_without_gpu
)
f
=
theano
.
function
([
b
],
T
.
ones_like
(
a
)
+
b
,
mode
=
mode_without_gpu
)
f_gpu
=
theano
.
function
([
b
],
B
.
gpu_from_host
(
T
.
ones_like
(
a
))
+
b
,
mode
=
mode_with_gpu
)
f_gpu
=
theano
.
function
([
b
],
B
.
gpu_from_host
(
T
.
ones_like
(
a
))
+
b
,
mode
=
mode_with_gpu
)
print
f
.
maker
.
env
.
toposort
()
print
f
.
maker
.
env
.
toposort
()
print
f_gpu
.
maker
.
env
.
toposort
()
print
f_gpu
.
maker
.
env
.
toposort
()
print
f
(
2
)
print
f
(
2
)
print
f_gpu
(
2
)
print
f_gpu
(
2
)
assert
sum
([
node
.
op
==
T
.
alloc
for
node
in
f
.
maker
.
env
.
toposort
()])
==
1
assert
sum
([
node
.
op
==
T
.
alloc
for
node
in
f
.
maker
.
env
.
toposort
()])
==
1
assert
sum
([
node
.
op
==
B
.
gpu_alloc
for
node
in
f_gpu
.
maker
.
env
.
toposort
()])
==
1
assert
sum
([
node
.
op
==
B
.
gpu_alloc
for
node
in
f_gpu
.
maker
.
env
.
toposort
()])
==
1
assert
numpy
.
allclose
(
numpy
.
ones
(
a
.
get_value
(
borrow
=
True
)
.
shape
)
+
9
,
f_gpu
(
9
))
assert
numpy
.
allclose
(
numpy
.
ones
(
a
.
get_value
(
borrow
=
True
)
.
shape
)
+
9
,
assert
numpy
.
allclose
(
f
(
5
),
f_gpu
(
5
))
f_gpu
(
9
))
assert
numpy
.
allclose
(
f
(
5
),
f_gpu
(
5
))
import
theano.tensor.tests.test_basic
import
theano.tensor.tests.test_basic
...
@@ -766,6 +859,7 @@ class TestAlloc(theano.tensor.tests.test_basic.TestAlloc):
...
@@ -766,6 +859,7 @@ class TestAlloc(theano.tensor.tests.test_basic.TestAlloc):
shared
=
staticmethod
(
cuda
.
shared_constructor
)
shared
=
staticmethod
(
cuda
.
shared_constructor
)
allocs
=
[
B
.
GpuAlloc
,
B
.
GpuAlloc
,
tensor
.
Alloc
]
allocs
=
[
B
.
GpuAlloc
,
B
.
GpuAlloc
,
tensor
.
Alloc
]
class
T_Join_and_Split
(
theano
.
tensor
.
tests
.
test_basic
.
T_Join_and_Split
):
class
T_Join_and_Split
(
theano
.
tensor
.
tests
.
test_basic
.
T_Join_and_Split
):
def
setUp
(
self
):
def
setUp
(
self
):
utt
.
seed_rng
()
utt
.
seed_rng
()
...
@@ -783,128 +877,152 @@ class T_Join_and_Split(theano.tensor.tests.test_basic.T_Join_and_Split):
...
@@ -783,128 +877,152 @@ class T_Join_and_Split(theano.tensor.tests.test_basic.T_Join_and_Split):
# This is to don't duplicate test.
# This is to don't duplicate test.
class
T_subtensor
(
theano
.
tensor
.
tests
.
test_basic
.
T_subtensor
):
class
T_subtensor
(
theano
.
tensor
.
tests
.
test_basic
.
T_subtensor
):
shared
=
staticmethod
(
cuda
.
shared_constructor
)
shared
=
staticmethod
(
cuda
.
shared_constructor
)
sub
=
cuda
.
GpuSubtensor
sub
=
cuda
.
GpuSubtensor
inc_sub
=
cuda
.
GpuIncSubtensor
inc_sub
=
cuda
.
GpuIncSubtensor
adv_sub1
=
cuda
.
GpuAdvancedSubtensor1
adv_sub1
=
cuda
.
GpuAdvancedSubtensor1
adv_incsub1
=
cuda
.
GpuAdvancedIncSubtensor1
adv_incsub1
=
cuda
.
GpuAdvancedIncSubtensor1
mode
=
mode_with_gpu
mode
=
mode_with_gpu
dtype
=
'float32'
dtype
=
'float32'
ignore_topo
=
(
B
.
HostFromGpu
,
B
.
GpuFromHost
)
ignore_topo
=
(
B
.
HostFromGpu
,
B
.
GpuFromHost
)
fast_compile
=
theano
.
config
.
mode
==
'FAST_COMPILE'
fast_compile
=
theano
.
config
.
mode
==
'FAST_COMPILE'
def
__init__
(
self
,
name
):
def
__init__
(
self
,
name
):
return
super
(
theano
.
tensor
.
tests
.
test_basic
.
T_subtensor
,
self
)
.
__init__
(
name
)
return
super
(
theano
.
tensor
.
tests
.
test_basic
.
T_subtensor
,
self
)
.
__init__
(
name
)
def
test_advinc_subtensor1
():
def
test_advinc_subtensor1
():
""" Test the second case in the opt local_gpu_advanced_incsubtensor1 """
""" Test the second case in the opt local_gpu_advanced_incsubtensor1 """
shared
=
cuda
.
shared_constructor
shared
=
cuda
.
shared_constructor
#shared = tensor.shared
#shared = tensor.shared
xval
=
numpy
.
asarray
([[
1
,
2
,
3
],
[
4
,
5
,
6
],
[
7
,
8
,
9
]],
xval
=
numpy
.
asarray
([[
1
,
2
,
3
],
[
4
,
5
,
6
],
[
7
,
8
,
9
]],
dtype
=
'float32'
)
dtype
=
'float32'
)
yval
=
numpy
.
asarray
([[
10
,
10
,
10
],
[
10
,
10
,
10
]],
yval
=
numpy
.
asarray
([[
10
,
10
,
10
],
[
10
,
10
,
10
]],
dtype
=
'float32'
)
dtype
=
'float32'
)
x
=
shared
(
xval
,
name
=
'x'
)
x
=
shared
(
xval
,
name
=
'x'
)
y
=
T
.
fmatrices
(
'y'
)
y
=
T
.
fmatrices
(
'y'
)
expr
=
T
.
advanced_inc_subtensor1
(
x
,
y
,[
0
,
2
])
expr
=
T
.
advanced_inc_subtensor1
(
x
,
y
,
[
0
,
2
])
f
=
theano
.
function
([
y
],
expr
,
mode
=
mode_with_gpu
)
f
=
theano
.
function
([
y
],
expr
,
mode
=
mode_with_gpu
)
assert
sum
([
isinstance
(
node
.
op
,
cuda
.
GpuAdvancedIncSubtensor1
)
for
node
in
f
.
maker
.
env
.
toposort
()
])
==
1
assert
sum
([
isinstance
(
node
.
op
,
cuda
.
GpuAdvancedIncSubtensor1
)
assert
numpy
.
allclose
(
f
(
yval
),[[
11.
,
12.
,
13.
],
[
4.
,
5.
,
6.
],
[
17.
,
18.
,
19.
]])
for
node
in
f
.
maker
.
env
.
toposort
()])
==
1
assert
numpy
.
allclose
(
f
(
yval
),
[[
11.
,
12.
,
13.
],
[
4.
,
5.
,
6.
],
[
17.
,
18.
,
19.
]])
def
test_inc_subtensor
():
def
test_inc_subtensor
():
shared
=
cuda
.
shared_constructor
shared
=
cuda
.
shared_constructor
#shared = tensor.shared
#shared = tensor.shared
x
,
y
=
T
.
fmatrices
(
'x'
,
'y'
)
x
,
y
=
T
.
fmatrices
(
'x'
,
'y'
)
xval
=
numpy
.
asarray
([[
1
,
2
,
3
],
[
4
,
5
,
6
],
[
7
,
8
,
9
]],
xval
=
numpy
.
asarray
([[
1
,
2
,
3
],
[
4
,
5
,
6
],
[
7
,
8
,
9
]],
dtype
=
'float32'
)
dtype
=
'float32'
)
yval
=
numpy
.
asarray
([[
10
,
10
,
10
],
[
10
,
10
,
10
],
[
10
,
10
,
10
]],
yval
=
numpy
.
asarray
([[
10
,
10
,
10
],
[
10
,
10
,
10
],
[
10
,
10
,
10
]],
dtype
=
'float32'
)
dtype
=
'float32'
)
expr
=
T
.
inc_subtensor
(
x
[:,
1
:
3
],
y
[:,
1
:
3
])
expr
=
T
.
inc_subtensor
(
x
[:,
1
:
3
],
y
[:,
1
:
3
])
f
=
theano
.
function
([
x
,
y
],
expr
,
mode
=
mode_with_gpu
)
f
=
theano
.
function
([
x
,
y
],
expr
,
mode
=
mode_with_gpu
)
print
f
.
maker
.
env
.
toposort
()
print
f
.
maker
.
env
.
toposort
()
assert
sum
([
isinstance
(
node
.
op
,
cuda
.
GpuSubtensor
)
for
node
in
f
.
maker
.
env
.
toposort
()
])
==
1
assert
sum
([
isinstance
(
node
.
op
,
cuda
.
GpuSubtensor
)
assert
sum
([
isinstance
(
node
.
op
,
cuda
.
GpuIncSubtensor
)
and
node
.
op
.
set_instead_of_inc
==
False
for
node
in
f
.
maker
.
env
.
toposort
()
])
==
1
for
node
in
f
.
maker
.
env
.
toposort
()])
==
1
assert
numpy
.
allclose
(
f
(
xval
,
yval
),[[
1.
,
12.
,
13.
],
[
4.
,
15.
,
16.
],
[
7.
,
18.
,
19.
]])
assert
sum
([
isinstance
(
node
.
op
,
cuda
.
GpuIncSubtensor
)
and
node
.
op
.
set_instead_of_inc
==
False
for
node
in
f
.
maker
.
env
.
toposort
()])
==
1
assert
numpy
.
allclose
(
f
(
xval
,
yval
),
[[
1.
,
12.
,
13.
],
[
4.
,
15.
,
16.
],
[
7.
,
18.
,
19.
]])
def
test_set_subtensor
():
def
test_set_subtensor
():
shared
=
cuda
.
shared_constructor
shared
=
cuda
.
shared_constructor
#shared = tensor.shared
#shared = tensor.shared
x
,
y
=
T
.
fmatrices
(
'x'
,
'y'
)
x
,
y
=
T
.
fmatrices
(
'x'
,
'y'
)
xval
=
numpy
.
asarray
([[
1
,
2
,
3
],
[
4
,
5
,
6
],
[
7
,
8
,
9
]],
xval
=
numpy
.
asarray
([[
1
,
2
,
3
],
[
4
,
5
,
6
],
[
7
,
8
,
9
]],
dtype
=
'float32'
)
dtype
=
'float32'
)
yval
=
numpy
.
asarray
([[
10
,
10
,
10
],
[
10
,
10
,
10
],
[
10
,
10
,
10
]],
yval
=
numpy
.
asarray
([[
10
,
10
,
10
],
[
10
,
10
,
10
],
[
10
,
10
,
10
]],
dtype
=
'float32'
)
dtype
=
'float32'
)
expr
=
T
.
set_subtensor
(
x
[:,
1
:
3
],
y
[:,
1
:
3
])
expr
=
T
.
set_subtensor
(
x
[:,
1
:
3
],
y
[:,
1
:
3
])
f
=
theano
.
function
([
x
,
y
],
expr
,
mode
=
mode_with_gpu
)
f
=
theano
.
function
([
x
,
y
],
expr
,
mode
=
mode_with_gpu
)
assert
sum
([
isinstance
(
node
.
op
,
cuda
.
GpuSubtensor
)
for
node
in
f
.
maker
.
env
.
toposort
()
])
==
1
assert
sum
([
isinstance
(
node
.
op
,
cuda
.
GpuSubtensor
)
assert
sum
([
isinstance
(
node
.
op
,
cuda
.
GpuIncSubtensor
)
and
node
.
op
.
set_instead_of_inc
==
True
for
node
in
f
.
maker
.
env
.
toposort
()
])
==
1
for
node
in
f
.
maker
.
env
.
toposort
()])
==
1
print
f
(
xval
,
yval
)
assert
sum
([
isinstance
(
node
.
op
,
cuda
.
GpuIncSubtensor
)
and
node
.
op
.
set_instead_of_inc
==
True
for
node
in
f
.
maker
.
env
.
toposort
()])
==
1
print
f
(
xval
,
yval
)
def
test_many_arg_elemwise
():
def
test_many_arg_elemwise
():
"""this test checks whether the + and * elemwise ops can handle extremely large numbers of
"""this test checks whether the + and * elemwise ops can handle extremely large numbers of
arguments on gpu
arguments on gpu
i.e., it is a test of the optimization theano/sandbox/cuda/opt.py:local_gpu_huge_add_or_mul """
i.e., it is a test of the optimization theano/sandbox/cuda/opt.py:local_gpu_huge_add_or_mul """
rng
=
numpy
.
random
.
RandomState
(
[
1
,
2
,
3
])
rng
=
numpy
.
random
.
RandomState
(
[
1
,
2
,
3
])
for
num_args
in
[
25
]:
for
num_args
in
[
25
]:
for
op_to_test
in
[
theano
.
tensor
.
add
,
theano
.
tensor
.
mul
]:
for
op_to_test
in
[
theano
.
tensor
.
add
,
theano
.
tensor
.
mul
]:
for
nb_dim
in
[
2
,
3
,
4
,
5
]:
for
nb_dim
in
[
2
,
3
,
4
,
5
]:
shapes
=
[
rng
.
randint
(
1
,
5
)
for
i
in
range
(
nb_dim
)]
shapes
=
[
rng
.
randint
(
1
,
5
)
for
i
in
range
(
nb_dim
)]
args
=
[
numpy
.
cast
[
'float32'
](
rng
.
randn
(
*
shapes
))
for
arg
in
xrange
(
0
,
num_args
)
]
args
=
[
numpy
.
cast
[
'float32'
](
rng
.
randn
(
*
shapes
))
for
arg
in
xrange
(
0
,
num_args
)]
symb_args
=
[
theano
.
tensor
.
TensorType
(
'float32'
,
(
False
,)
*
nb_dim
)()
for
arg
in
xrange
(
0
,
num_args
)
]
symb_args
=
[
theano
.
tensor
.
TensorType
(
'float32'
,
(
False
,)
*
nb_dim
)()
for
arg
in
xrange
(
0
,
num_args
)]
outputs
=
[]
outputs
=
[]
for
mode
in
[
mode_with_gpu
,
mode_without_gpu
]:
for
mode
in
[
mode_with_gpu
,
mode_without_gpu
]:
#test the optijmization local_gpu_elemwise_0
#test the optijmization local_gpu_elemwise_0
f
=
theano
.
function
(
symb_args
,
op_to_test
(
*
symb_args
),
mode
=
mode
.
excluding
(
"local_gpu_elemwise_1"
)
)
f
=
theano
.
function
(
outputs
.
append
(
f
(
*
args
)
)
symb_args
,
op_to_test
(
*
symb_args
),
mode
=
mode
.
excluding
(
"local_gpu_elemwise_1"
))
outputs
.
append
(
f
(
*
args
))
#assert that the test was done on the gpu.
#assert that the test was done on the gpu.
if
mode
is
mode_with_gpu
:
if
mode
is
mode_with_gpu
:
assert
any
([
isinstance
(
node
.
op
,
cuda
.
GpuElemwise
)
for
node
in
f
.
maker
.
env
.
nodes
])
assert
any
([
isinstance
(
node
.
op
,
cuda
.
GpuElemwise
)
for
node
in
f
.
maker
.
env
.
nodes
])
#test the optijmization local_gpu_elemwise_1
#test the optijmization local_gpu_elemwise_1
f
=
theano
.
function
(
symb_args
,
f
=
theano
.
function
(
cuda
.
gpu_from_host
(
op_to_test
(
*
symb_args
)),
symb_args
,
mode
=
mode
.
excluding
(
"local_gpu_elemwise_0"
)
)
cuda
.
gpu_from_host
(
op_to_test
(
*
symb_args
)),
out
=
f
(
*
args
)
mode
=
mode
.
excluding
(
"local_gpu_elemwise_0"
))
out
=
f
(
*
args
)
#assert that the test was done on the gpu.
#assert that the test was done on the gpu.
if
mode
is
mode_with_gpu
:
if
mode
is
mode_with_gpu
:
assert
any
([
isinstance
(
node
.
op
,
cuda
.
GpuElemwise
)
for
node
in
f
.
maker
.
env
.
nodes
])
assert
any
([
isinstance
(
node
.
op
,
cuda
.
GpuElemwise
)
for
node
in
f
.
maker
.
env
.
nodes
])
assert
numpy
.
allclose
(
out
,
outputs
[
-
1
])
assert
numpy
.
allclose
(
out
,
outputs
[
-
1
])
results_gpu
,
results_cpu
=
outputs
results_gpu
,
results_cpu
=
outputs
assert
numpy
.
allclose
(
results_gpu
,
results_cpu
)
assert
numpy
.
allclose
(
results_gpu
,
results_cpu
)
def
test_duplicate_arg_elemwise
():
def
test_duplicate_arg_elemwise
():
A
=
theano
.
tensor
.
fmatrix
()
A
=
theano
.
tensor
.
fmatrix
()
B
=
A
+
A
B
=
A
+
A
f
=
theano
.
function
([
A
],
B
,
mode
=
mode_with_gpu
)
f
=
theano
.
function
([
A
],
B
,
mode
=
mode_with_gpu
)
Aval
=
numpy
.
random
.
RandomState
([
1
,
2
,
3
])
.
randn
(
5
,
5
)
.
astype
(
'float32'
)
Aval
=
numpy
.
random
.
RandomState
([
1
,
2
,
3
])
.
randn
(
5
,
5
)
.
astype
(
'float32'
)
Bval
=
Aval
+
Aval
Bval
=
Aval
+
Aval
assert
numpy
.
allclose
(
Bval
,
f
(
Aval
))
assert
numpy
.
allclose
(
Bval
,
f
(
Aval
))
def
test_shared_float32
():
def
test_shared_float32
():
'''Test use of cuda.shared_constructor through theano.shared'''
'''Test use of cuda.shared_constructor through theano.shared'''
# Register cuda.shared_constructor in theano.shared
# Register cuda.shared_constructor in theano.shared
theano
.
shared
.
constructors
.
append
(
cuda
.
shared_constructor
)
theano
.
shared
.
constructors
.
append
(
cuda
.
shared_constructor
)
a
=
theano
.
shared
(
numpy
.
ones
((
2
,
3
),
dtype
=
'float32'
))
a
=
theano
.
shared
(
numpy
.
ones
((
2
,
3
),
dtype
=
'float32'
))
assert
isinstance
(
a
.
type
,
tcn
.
CudaNdarrayType
)
assert
isinstance
(
a
.
type
,
tcn
.
CudaNdarrayType
)
# Unregister
# Unregister
del
theano
.
shared
.
constructors
[
-
1
]
del
theano
.
shared
.
constructors
[
-
1
]
def
test_shared_cudandarray
():
def
test_shared_cudandarray
():
'''Test that we can create a CudaNdarraySharedVariable from a CudaNdarray'''
'''Test that we can create a CudaNdarraySharedVariable from a
a
=
cuda
.
shared_constructor
(
cuda
.
CudaNdarray
.
zeros
((
2
,
3
)))
CudaNdarray'''
a
=
cuda
.
shared_constructor
(
cuda
.
CudaNdarray
.
zeros
((
2
,
3
)))
assert
isinstance
(
a
.
type
,
tcn
.
CudaNdarrayType
)
assert
isinstance
(
a
.
type
,
tcn
.
CudaNdarrayType
)
...
@@ -987,38 +1105,38 @@ class test_size(unittest.TestCase):
...
@@ -987,38 +1105,38 @@ class test_size(unittest.TestCase):
import
theano.tensor.tests.test_sharedvar
import
theano.tensor.tests.test_sharedvar
#This test the case when the shared constructor view an CudaNdarray as input
#This test the case when the shared constructor view an CudaNdarray as input
test_shared_options
=
theano
.
tensor
.
tests
.
test_sharedvar
.
makeSharedTester
(
test_shared_options
=
theano
.
tensor
.
tests
.
test_sharedvar
.
makeSharedTester
(
shared_constructor_
=
tcn
.
shared_constructor
,
shared_constructor_
=
tcn
.
shared_constructor
,
dtype_
=
'float32'
,
dtype_
=
'float32'
,
get_value_borrow_true_alias_
=
True
,
get_value_borrow_true_alias_
=
True
,
shared_borrow_true_alias_
=
True
,
#True when the original value is already a CudaNdarray!
shared_borrow_true_alias_
=
True
,
#True when the original value is already a CudaNdarray!
set_value_borrow_true_alias_
=
True
,
set_value_borrow_true_alias_
=
True
,
set_value_inplace_
=
True
,
set_value_inplace_
=
True
,
set_cast_value_inplace_
=
False
,
set_cast_value_inplace_
=
False
,
shared_constructor_accept_ndarray_
=
True
,
shared_constructor_accept_ndarray_
=
True
,
internal_type_
=
cuda_ndarray
.
CudaNdarray
,
internal_type_
=
cuda_ndarray
.
CudaNdarray
,
test_internal_type_
=
lambda
a
:
isinstance
(
a
,
cuda_ndarray
.
CudaNdarray
),
test_internal_type_
=
lambda
a
:
isinstance
(
a
,
cuda_ndarray
.
CudaNdarray
),
theano_fct_
=
theano
.
tensor
.
exp
,
theano_fct_
=
theano
.
tensor
.
exp
,
ref_fct_
=
numpy
.
exp
,
ref_fct_
=
numpy
.
exp
,
cast_value_
=
cuda
.
as_cuda_array
,
cast_value_
=
cuda
.
as_cuda_array
,
op_by_matrix_
=
True
,
op_by_matrix_
=
True
,
name
=
'test_shared_options'
)
name
=
'test_shared_options'
)
#This test the case when the shared constructor view an ndarray as input
#This test the case when the shared constructor view an ndarray as input
test_shared_options2
=
theano
.
tensor
.
tests
.
test_sharedvar
.
makeSharedTester
(
test_shared_options2
=
theano
.
tensor
.
tests
.
test_sharedvar
.
makeSharedTester
(
shared_constructor_
=
tcn
.
shared_constructor
,
shared_constructor_
=
tcn
.
shared_constructor
,
dtype_
=
'float32'
,
dtype_
=
'float32'
,
get_value_borrow_true_alias_
=
False
,
get_value_borrow_true_alias_
=
False
,
shared_borrow_true_alias_
=
False
,
shared_borrow_true_alias_
=
False
,
set_value_borrow_true_alias_
=
False
,
set_value_borrow_true_alias_
=
False
,
set_value_inplace_
=
True
,
set_value_inplace_
=
True
,
set_cast_value_inplace_
=
True
,
set_cast_value_inplace_
=
True
,
shared_constructor_accept_ndarray_
=
True
,
shared_constructor_accept_ndarray_
=
True
,
internal_type_
=
cuda_ndarray
.
CudaNdarray
,
internal_type_
=
cuda_ndarray
.
CudaNdarray
,
test_internal_type_
=
lambda
a
:
isinstance
(
a
,
cuda_ndarray
.
CudaNdarray
),
test_internal_type_
=
lambda
a
:
isinstance
(
a
,
cuda_ndarray
.
CudaNdarray
),
theano_fct_
=
theano
.
tensor
.
exp
,
theano_fct_
=
theano
.
tensor
.
exp
,
ref_fct_
=
numpy
.
exp
,
ref_fct_
=
numpy
.
exp
,
cast_value_
=
numpy
.
asarray
,
cast_value_
=
numpy
.
asarray
,
op_by_matrix_
=
True
,
op_by_matrix_
=
True
,
name
=
'test_shared_options'
)
name
=
'test_shared_options'
)
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
...
...
theano/sandbox/cuda/tests/test_driver.py
0 → 100644
浏览文件 @
8aa08ca2
import
numpy
import
theano
# Skip test if cuda_ndarray is not available.
from
nose.plugins.skip
import
SkipTest
import
theano.sandbox.cuda
as
cuda_ndarray
if
cuda_ndarray
.
cuda_available
==
False
:
raise
SkipTest
(
'Optional package cuda disabled'
)
import
theano.sandbox.cuda
as
cuda
import
theano.sandbox.cuda.basic_ops
as
B
if
theano
.
config
.
mode
==
'FAST_COMPILE'
:
mode_with_gpu
=
theano
.
compile
.
mode
.
get_mode
(
'FAST_RUN'
)
.
including
(
'gpu'
)
else
:
mode_with_gpu
=
theano
.
compile
.
mode
.
get_default_mode
()
.
including
(
'gpu'
)
def
test_nvidia_driver1
():
""" Some nvidia driver give bad result for reduction
This execute some reduction test to ensure it run correctly
"""
a
=
numpy
.
random
.
rand
(
10000
)
.
astype
(
"float32"
)
A
=
cuda
.
shared_constructor
(
a
)
f
=
theano
.
function
(
inputs
=
[],
outputs
=
A
.
sum
(),
mode
=
mode_with_gpu
)
topo
=
f
.
maker
.
env
.
toposort
()
assert
len
(
topo
)
==
2
assert
sum
(
isinstance
(
node
.
op
,
B
.
GpuSum
)
for
node
in
topo
)
==
1
if
not
numpy
.
allclose
(
f
(),
a
.
sum
()):
raise
Exception
(
"The nvidia driver version installed with the OS "
"don't give good result for reduction."
"Installing the nvidia driver available on the same "
"download page as the cuda package will fix the "
"problem: http://developer.nvidia.com/cuda-downloads"
)
def
test_nvidia_driver2
():
""" Test that the gpu device is initialized by theano when
we manually make a shared variable on the gpu.
The driver should always be tested during theano initialization
of the gpu device
"""
a
=
numpy
.
random
.
rand
(
10000
)
.
astype
(
"float32"
)
cuda
.
shared_constructor
(
a
)
assert
theano
.
sandbox
.
cuda
.
use
.
device_number
is
not
None
def
test_nvidia_driver3
():
""" Test that the gpu device is initialized by theano when
we build a function with gpu op.
The driver should always be tested during theano initialization
of the gpu device
"""
var
=
cuda
.
fvector
()
f
=
theano
.
function
([
var
],
var
+
1
,
mode
=
mode_with_gpu
)
topo
=
f
.
maker
.
env
.
toposort
()
assert
any
([
isinstance
(
node
.
op
,
cuda
.
GpuElemwise
)
for
node
in
topo
])
assert
theano
.
sandbox
.
cuda
.
use
.
device_number
is
not
None
# TODO make sure the test_nvidia_driver test are executed when we make manually
# a CudaNdarray like this: cuda.CudaNdarray.zeros((5,4))
theano/sandbox/cuda/var.py
浏览文件 @
8aa08ca2
...
@@ -169,6 +169,12 @@ def cuda_shared_constructor(value, name=None, strict=False,
...
@@ -169,6 +169,12 @@ def cuda_shared_constructor(value, name=None, strict=False,
def
float32_shared_constructor
(
value
,
name
=
None
,
strict
=
False
,
def
float32_shared_constructor
(
value
,
name
=
None
,
strict
=
False
,
allow_downcast
=
None
,
borrow
=
False
,
broadcastable
=
None
):
allow_downcast
=
None
,
borrow
=
False
,
broadcastable
=
None
):
"""SharedVariable Constructor for CudaNdarrayType from numpy.ndarray or CudaNdarray"""
"""SharedVariable Constructor for CudaNdarrayType from numpy.ndarray or CudaNdarray"""
if
theano
.
sandbox
.
cuda
.
use
.
device_number
is
None
:
theano
.
sandbox
.
cuda
.
use
(
"gpu"
,
force
=
True
,
default_to_move_computation_to_gpu
=
False
,
move_shared_float32_to_gpu
=
False
,
enable_cuda
=
False
)
# if value isn't a float32 ndarray, or a CudaNdarray then raise
# if value isn't a float32 ndarray, or a CudaNdarray then raise
...
...
theano/sandbox/multinomial.py
浏览文件 @
8aa08ca2
...
@@ -5,7 +5,7 @@ from theano.gof import local_optimizer
...
@@ -5,7 +5,7 @@ from theano.gof import local_optimizer
from
theano.sandbox.cuda
import
cuda_available
from
theano.sandbox.cuda
import
cuda_available
if
cuda_available
:
if
cuda_available
:
from
theano.sandbox.cuda
import
CudaNdarrayType
from
theano.sandbox.cuda
import
CudaNdarrayType
,
GpuOp
from
theano.sandbox.cuda.basic_ops
import
host_from_gpu
,
gpu_from_host
from
theano.sandbox.cuda.basic_ops
import
host_from_gpu
,
gpu_from_host
from
theano.sandbox.cuda.opt
import
register_opt
from
theano.sandbox.cuda.opt
import
register_opt
...
@@ -120,7 +120,7 @@ class MultinomialFromUniform(Op):
...
@@ -120,7 +120,7 @@ class MultinomialFromUniform(Op):
"""
%
locals
()
"""
%
locals
()
class
GpuMultinomialFromUniform
(
MultinomialFromUniform
):
class
GpuMultinomialFromUniform
(
MultinomialFromUniform
,
GpuOp
):
"""
"""
The output is transposed compared to MultinomialFromUniform.
The output is transposed compared to MultinomialFromUniform.
We must insert a Transpose op after it.
We must insert a Transpose op after it.
...
...
theano/sandbox/neighbours.py
浏览文件 @
8aa08ca2
...
@@ -5,7 +5,7 @@ from theano.gof import local_optimizer
...
@@ -5,7 +5,7 @@ from theano.gof import local_optimizer
from
theano.sandbox.cuda
import
cuda_available
from
theano.sandbox.cuda
import
cuda_available
if
cuda_available
:
if
cuda_available
:
from
theano.sandbox.cuda
import
CudaNdarrayType
from
theano.sandbox.cuda
import
CudaNdarrayType
,
GpuOp
from
theano.sandbox.cuda.basic_ops
import
host_from_gpu
,
gpu_from_host
from
theano.sandbox.cuda.basic_ops
import
host_from_gpu
,
gpu_from_host
from
theano.sandbox.cuda.opt
import
register_opt
as
register_gpu_opt
from
theano.sandbox.cuda.opt
import
register_opt
as
register_gpu_opt
...
@@ -292,7 +292,7 @@ def neibs2images(neibs, neib_shape, original_shape, mode='valid'):
...
@@ -292,7 +292,7 @@ def neibs2images(neibs, neib_shape, original_shape, mode='valid'):
# This is work in progress
# This is work in progress
class
GpuImages2Neibs
(
Images2Neibs
):
class
GpuImages2Neibs
(
Images2Neibs
,
GpuOp
):
def
__init__
(
self
,
mode
=
'valid'
):
def
__init__
(
self
,
mode
=
'valid'
):
if
mode
not
in
[
'valid'
,
'wrap_centered'
]:
if
mode
not
in
[
'valid'
,
'wrap_centered'
]:
raise
NotImplementedError
(
"Only the mode valid and wrap_centered"
raise
NotImplementedError
(
"Only the mode valid and wrap_centered"
...
...
theano/sandbox/rng_mrg.py
浏览文件 @
8aa08ca2
...
@@ -20,7 +20,10 @@ import multinomial
...
@@ -20,7 +20,10 @@ import multinomial
from
theano.sandbox.cuda
import
cuda_available
,
cuda_enabled
from
theano.sandbox.cuda
import
cuda_available
,
cuda_enabled
if
cuda_available
:
if
cuda_available
:
from
theano.sandbox.cuda
import
CudaNdarrayType
,
float32_shared_constructor
from
theano.sandbox.cuda
import
(
CudaNdarrayType
,
float32_shared_constructor
,
GpuOp
)
def
mulmod
(
a
,
b
,
c
,
m
):
def
mulmod
(
a
,
b
,
c
,
m
):
r
=
numpy
.
int32
((
numpy
.
int64
(
a
)
*
b
+
c
)
%
m
)
r
=
numpy
.
int32
((
numpy
.
int64
(
a
)
*
b
+
c
)
%
m
)
...
@@ -372,7 +375,7 @@ class mrg_uniform(mrg_uniform_base):
...
@@ -372,7 +375,7 @@ class mrg_uniform(mrg_uniform_base):
def
c_code_cache_version
(
self
):
def
c_code_cache_version
(
self
):
return
(
1
,)
return
(
1
,)
class
GPU_mrg_uniform
(
mrg_uniform_base
):
class
GPU_mrg_uniform
(
mrg_uniform_base
,
GpuOp
):
#GPU VERSION
#GPU VERSION
@classmethod
@classmethod
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论