Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
8bd900f8
提交
8bd900f8
authored
10月 31, 2013
作者:
Frédéric Bastien
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #1582 from abergeron/compyte2-rb
Compyte2 rebase
上级
382d2ed1
217b616b
隐藏空白字符变更
内嵌
并排
正在显示
22 个修改的文件
包含
1353 行增加
和
505 行删除
+1353
-505
test_basic_ops.py
theano/sandbox/cuda/tests/test_basic_ops.py
+1
-1
__init__.py
theano/sandbox/gpuarray/__init__.py
+8
-1
basic_ops.py
theano/sandbox/gpuarray/basic_ops.py
+156
-96
blas.py
theano/sandbox/gpuarray/blas.py
+150
-0
elemwise.py
theano/sandbox/gpuarray/elemwise.py
+290
-77
opt.py
theano/sandbox/gpuarray/opt.py
+151
-63
subtensor.py
theano/sandbox/gpuarray/subtensor.py
+156
-0
test_basic_ops.py
theano/sandbox/gpuarray/tests/test_basic_ops.py
+112
-134
test_blas.py
theano/sandbox/gpuarray/tests/test_blas.py
+35
-0
test_elemwise.py
theano/sandbox/gpuarray/tests/test_elemwise.py
+58
-0
test_opt.py
theano/sandbox/gpuarray/tests/test_opt.py
+37
-0
test_subtensor.py
theano/sandbox/gpuarray/tests/test_subtensor.py
+26
-0
test_type.py
theano/sandbox/gpuarray/tests/test_type.py
+20
-0
type.py
theano/sandbox/gpuarray/type.py
+21
-23
basic.py
theano/scalar/basic.py
+1
-1
elemwise.py
theano/tensor/elemwise.py
+4
-20
opt.py
theano/tensor/opt.py
+7
-7
test_basic.py
theano/tensor/tests/test_basic.py
+43
-24
test_elemwise.py
theano/tensor/tests/test_elemwise.py
+67
-49
test_subtensor.py
theano/tensor/tests/test_subtensor.py
+4
-6
var.py
theano/tensor/var.py
+2
-2
unittest_tools.py
theano/tests/unittest_tools.py
+4
-1
没有找到文件。
theano/sandbox/cuda/tests/test_basic_ops.py
浏览文件 @
8bd900f8
...
...
@@ -932,7 +932,7 @@ class T_subtensor(theano.tensor.tests.test_subtensor.T_subtensor):
adv_incsub1
=
cuda
.
GpuAdvancedIncSubtensor1
mode
=
mode_with_gpu
dtype
=
'float32'
ignore_topo
=
(
B
.
HostFromGpu
,
B
.
GpuFromHost
)
ignore_topo
=
(
B
.
HostFromGpu
,
B
.
GpuFromHost
,
theano
.
compile
.
DeepCopyOp
)
fast_compile
=
False
ops
=
(
cuda
.
GpuSubtensor
,
cuda
.
GpuIncSubtensor
,
cuda
.
GpuAdvancedSubtensor1
,
cuda
.
GpuAdvancedIncSubtensor1
)
...
...
theano/sandbox/gpuarray/__init__.py
浏览文件 @
8bd900f8
import
logging
import
theano
from
theano.configparser
import
config
from
theano.configparser
import
config
,
AddConfigVar
,
BoolParam
from
theano.compile
import
optdb
_logger_name
=
'theano.sandbox.gpuarray'
...
...
@@ -18,6 +18,13 @@ try:
except
ImportError
:
pygpu
=
None
AddConfigVar
(
'gpuarray.sync'
,
"""If True, every op will make sure its work is done before
returning. Setting this to True will slow down execution,
but give much more accurate results in profiling."""
,
BoolParam
(
False
),
in_c_key
=
True
)
# This is for documentation not to depend on the availability of pygpu
from
type
import
(
GpuArrayType
,
GpuArrayVariable
,
GpuArrayConstant
,
GpuArraySharedVariable
,
gpuarray_shared_constructor
)
...
...
theano/sandbox/gpuarray/basic_ops.py
浏览文件 @
8bd900f8
...
...
@@ -6,8 +6,10 @@ import theano
from
theano
import
Op
,
Type
,
Apply
,
Variable
,
Constant
from
theano
import
tensor
,
scalar
,
config
from
theano.scalar
import
Scalar
from
theano.tensor.basic
import
Alloc
from
theano.gof.python25
import
all
,
any
from
theano.gof.utils
import
MethodNotDefined
try
:
import
pygpu
...
...
@@ -17,6 +19,7 @@ except ImportError:
from
type
import
GpuArrayType
def
as_gpuarray_variable
(
x
):
if
hasattr
(
x
,
'_as_GpuArrayVariable'
):
return
x
.
_as_GpuArrayVariable
()
...
...
@@ -29,6 +32,32 @@ def as_gpuarray(x):
return
gpuarray
.
array
(
x
,
copy
=
False
)
class
HideC
(
object
):
def
__hide
(
*
args
):
raise
MethodNotDefined
()
c_code
=
__hide
c_code_cleanup
=
__hide
c_headers
=
__hide
c_header_dirs
=
__hide
c_libraries
=
__hide
c_lib_dirs
=
__hide
c_support_code
=
__hide
c_support_code_apply
=
__hide
c_compile_args
=
__hide
c_no_compile_args
=
__hide
c_init_code
=
__hide
def
c_code_cache_version
(
self
):
return
()
def
c_code_cache_version_apply
(
self
,
node
):
return
self
.
c_code_cache_version
()
class
HostFromGpu
(
Op
):
def
__eq__
(
self
,
other
):
return
type
(
self
)
==
type
(
other
)
...
...
@@ -66,7 +95,7 @@ class HostFromGpu(Op):
} else {
%(name)
s_ga = &
%(inp)
s->ga;
}
%(name)
s_dtype = typecode_to_dtype(
%(
inp)
s->ga.
typecode);
%(name)
s_dtype = typecode_to_dtype(
%(
name)
s_ga->
typecode);
Py_XDECREF(
%(out)
s);
// PyArray_Empty below steals a reference to the dtype we pass it
// so we need an extra one to spare.
...
...
@@ -89,7 +118,7 @@ class HostFromGpu(Op):
%(fail)
s
}
"""
%
{
'name'
:
name
,
'fail'
:
sub
[
'fail'
],
'inp'
:
inputs
[
0
],
'out'
:
outputs
[
0
]}
'out'
:
outputs
[
0
]}
def
c_code_cache_version
(
self
):
return
(
1
,)
...
...
@@ -97,7 +126,7 @@ class HostFromGpu(Op):
def
grad
(
self
,
inputs
,
grads
):
gz
,
=
grads
return
[
gpu_from_host
(
gz
)]
def
R_op
(
self
,
inputs
,
eval_points
):
ev
,
=
eval_points
if
isinstance
(
ev
,
tensor
.
TensorType
):
...
...
@@ -150,48 +179,22 @@ class GpuFromHost(Op):
def
c_code
(
self
,
node
,
name
,
inputs
,
outputs
,
sub
):
return
"""
PyArrayObject *
%(name)
s_tmp;
int
%(name)
serr;
%(name)
s_tmp = PyArray_GETCONTIGUOUS(
%(inp)
s);
if (
%(name)
s_tmp == NULL) {
// PyArray_GETCONTIGUOUS sets an error message if it fails
%(fail)
s
}
Py_XDECREF(
%(out)
s);
%(out)
s = new_GpuArray((PyObject *)&GpuArrayType, GpuArray_default_context());
if (
%(out)
s == NULL) {
Py_DECREF(
%(name)
s_tmp);
// new_GpuArray calls __new__ which will set an error message
// if it returns NULL.
%(fail)
s
}
%(name)
serr = GpuArray_empty(&
%(out)
s->ga,
GpuArray_default_context()->ops,
GpuArray_default_context()->ctx,
get_typecode((PyObject *)PyArray_DESCR(
%(name)
s_tmp)),
%(out)
s = pygpu_fromhostdata(PyArray_DATA(
%(inp)
s),
get_typecode((PyObject *)PyArray_DESCR(
%(inp)
s)),
PyArray_NDIM(
%(inp)
s),
(size_t *)PyArray_DIMS(
%(inp)
s),
GA_C_ORDER);
if (
%(name)
serr != GA_NO_ERROR) {
Py_DECREF(
%(name)
s_tmp);
Py_DECREF(
%(out)
s);
%(out)
s = NULL;
PyErr_SetString(PyExc_MemoryError, "Can't allocate device memory for result.");
%(fail)
s
}
%(name)
serr = GpuArray_write(&
%(out)
s->ga, PyArray_DATA(
%(name)
s_tmp),
PyArray_NBYTES(
%(name)
s_tmp));
Py_DECREF(
%(name)
s_tmp);
if (
%(name)
serr != GA_NO_ERROR) {
Py_DECREF(
%(out)
s);
PyErr_SetString(PyExc_RuntimeError, "Could not copy array data to device");
(ssize_t *)PyArray_STRIDES(
%(inp)
s),
pygpu_default_context(),
Py_None);
if (
%(out)
s == NULL) {
%(fail)
s
}
"""
%
{
'name'
:
name
,
'inp'
:
inputs
[
0
],
'out'
:
outputs
[
0
],
'fail'
:
sub
[
'fail'
]}
def
c_code_cache_version
(
self
):
return
(
1
,)
return
(
4
,)
gpu_from_host
=
GpuFromHost
()
...
...
@@ -276,7 +279,7 @@ class GpuFromCuda(Op):
ssize_t *
%(name)
sstr;
cuCtxGetCurrent(&
%(name)
scur);
if (
%(name)
scur != cuda_get_ctx(
GpuArray
_default_context()->ctx)) {
if (
%(name)
scur != cuda_get_ctx(
pygpu
_default_context()->ctx)) {
PyErr_SetString(PyExc_ValueError, "Ambient cuda context is not the same as output context.");
%(fail)
s
}
...
...
@@ -297,15 +300,7 @@ class GpuFromCuda(Op):
%(name)
sstr[i] = (ssize_t)CudaNdarray_HOST_STRIDES(
%(in)
s)[i]*4;
}
Py_XDECREF(
%(out)
s);
%(out)
s = new_GpuArray((PyObject *)&GpuArrayType, GpuArray_default_context());
if (
%(out)
s == NULL) {
free(
%(name)
sdims);
free(
%(name)
sstr);
%(fail)
s
}
%(name)
sdata = cuda_make_buf(GpuArray_default_context()->ctx,
%(name)
sdata = cuda_make_buf(pygpu_default_context()->ctx,
(CUdeviceptr)
%(in)
s->devdata,
((size_t)
%(in)
s->data_allocated)*4);
if (
%(name)
sdata == NULL) {
...
...
@@ -315,24 +310,23 @@ class GpuFromCuda(Op):
PyErr_SetString(PyExc_MemoryError, "Could not allocate gpudata structure.");
%(fail)
s
}
%(name)
serr = GpuArray_fromdata(&
%(out)
s->ga,
GpuArray_default_context()->ops,
%(name)
sdata, 0, GA_FLOAT,
%(in)
s->nd,
%(name)
sdims,
%(name)
sstr, 1);
Py_XDECREF(
%(out)
s);
%(out)
s = pygpu_fromgpudata(
%(name)
sdata, 0, GA_FLOAT,
%(in)
s->nd,
%(name)
sdims,
%(name)
sstr,
pygpu_default_context(), 1,
(PyObject *)
%(in)
s,
(PyObject *)&PyGpuArrayType);
pygpu_default_context()->ops->buffer_release(
%(name)
sdata);
free(
%(name)
sdims);
free(
%(name)
sstr);
if (
%(name)
serr != GA_NO_ERROR) {
Py_DECREF(
%(out)
s);
PyErr_SetString(PyExc_MemoryError, "Could not allocate GpuArray structure.");
if (
%(out)
s == NULL) {
%(fail)
s
}
Py_INCREF(
%(in)
s);
%(out)
s->base = (PyObject *)
%(in)
s;
"""
%
{
'name'
:
name
,
'in'
:
inputs
[
0
],
'out'
:
outputs
[
0
],
"""
%
{
'name'
:
name
,
'in'
:
inputs
[
0
],
'out'
:
outputs
[
0
],
'fail'
:
sub
[
'fail'
]}
def
c_code_cache_version
(
self
):
return
(
1
,)
return
(
5
,)
gpu_from_cuda
=
GpuFromCuda
()
...
...
@@ -417,11 +411,15 @@ class CudaFromGpu(Op):
CUcontext
%(name)
scur;
cuCtxGetCurrent(&
%(name)
scur);
if (
%(name)
scur != cuda_get_ctx(
GpuArray
_default_context()->ctx)) {
if (
%(name)
scur != cuda_get_ctx(
pygpu
_default_context()->ctx)) {
PyErr_SetString(PyExc_ValueError, "Ambient cuda context is not the same as output context.");
%(fail)
s
}
if (GpuArray_sync(&
%(inp)
s->ga) != GA_NO_ERROR) {
PyErr_SetString(PyExc_RuntimeError, "Could not sync GpuArray");
%(fail)
s
}
Py_XDECREF(
%(out)
s);
%(out)
s = (CudaNdarray *)CudaNdarray_new_nd(
%(inp)
s->ga.nd);
if (!
%(out)
s) {
...
...
@@ -441,61 +439,123 @@ class CudaFromGpu(Op):
'fail'
:
sub
[
'fail'
]}
def
c_code_cache_version
(
self
):
return
(
1
,)
return
(
3
,)
cuda_from_gpu
=
CudaFromGpu
()
class
GpuAlloc
(
Op
):
class
GpuAlloc
(
HideC
,
Alloc
):
def
__str__
(
self
):
return
'GpuAlloc'
def
__hash__
(
self
):
return
hash
(
type
(
self
))
def
__eq__
(
self
,
other
):
return
type
(
self
)
==
type
(
other
)
def
make_node
(
self
,
value
,
*
shape
):
v
=
as_gpuarray_variable
(
value
)
sh
=
[
tensor
.
as_tensor_variable
(
s
)
for
s
in
shape
]
bcast
=
[]
if
v
.
ndim
>
len
(
shape
):
raise
TypeError
(
'GpuAlloc value has more dimensions than arguments'
,
value
.
ndim
,
len
(
shape
))
for
i
,
s
in
enumerate
(
sh
):
if
s
.
type
.
dtype
[:
3
]
not
in
(
'int'
,
'uint'
):
raise
TypeError
(
'Shape arguments must be integers'
,
s
)
try
:
const_shp
=
tensor
.
get_scalar_constant_value
(
s
)
except
tensor
.
NotScalarConstantError
:
const_shp
=
None
bcast
.
append
(
numpy
.
all
(
1
==
const_shp
))
otype
=
GpuArrayType
(
dtype
=
v
.
dtype
,
broadcastable
=
bcast
)
return
Apply
(
self
,
[
v
]
+
sh
,
[
otype
()])
res
=
Alloc
.
make_node
(
self
,
value
,
*
shape
)
value
=
as_gpuarray_variable
(
value
)
otype
=
GpuArrayType
(
dtype
=
res
.
outputs
[
0
]
.
dtype
,
broadcastable
=
res
.
outputs
[
0
]
.
broadcastable
)
return
Apply
(
self
,
[
value
]
+
res
.
inputs
[
1
:],
[
otype
()])
def
perform
(
self
,
node
,
inputs
,
outs
):
out
,
=
outs
v
=
inputs
[
0
]
sh
=
tuple
(
map
(
int
,
inputs
[
1
:]))
if
out
[
0
]
is
None
or
out
[
0
]
.
shape
!=
sh
:
out
[
0
]
=
gpuarray
.
empty
(
sh
,
dtype
=
v
.
dtype
)
out
[
0
][
...
]
=
v
if
v
.
size
==
1
and
numpy
.
asarray
(
v
)[
0
]
.
item
()
==
0
:
out
[
0
]
=
gpuarray
.
zeros
(
sh
,
dtype
=
v
.
dtype
)
else
:
out
[
0
]
=
gpuarray
.
empty
(
sh
,
dtype
=
v
.
dtype
)
out
[
0
][
...
]
=
v
else
:
out
[
0
][
...
]
=
v
if
config
.
gpuarray
.
sync
:
out
[
0
]
.
sync
()
def
c_code
(
self
,
node
,
name
,
inp
,
out
,
sub
):
vv
=
inp
[
0
]
ndim
=
len
(
inp
[
1
:])
zz
,
=
out
code
=
"""
int i;
size_t
%(name)
s_shape[
%(ndim)
s];
"""
%
dict
(
name
=
name
,
ndim
=
ndim
)
for
i
,
shp_i
in
enumerate
(
inp
[
1
:]):
code
+=
"""
%(name)
s_shape[
%(i)
s] = ((dtype_
%(shp_i)
s *)PyArray_DATA(
%(shp_i)
s))[0];
"""
%
dict
(
name
=
name
,
i
=
i
,
shp_i
=
shp_i
)
code
+=
"""
int need_new_out = (NULL ==
%(zz)
s ||
%(zz)
s->ga.nd !=
%(ndim)
s);
if (!need_new_out)
for (i = 0; i <
%(ndim)
s; i++)
need_new_out |=
%(zz)
s->ga.dimensions[i] !=
%(name)
s_shape[i];
if (need_new_out) {
Py_XDECREF(
%(zz)
s);
%(zz)
s = pygpu_empty(
%(ndim)
s,
%(name)
s_shape,
%(vv)
s->ga.typecode, GA_C_ORDER,
pygpu_default_context(), Py_None);
if (!
%(zz)
s) {
%(fail)
s
}
}
def
infer_shape
(
self
,
node
,
input_shapes
):
return
[
node
.
inputs
[
1
:]]
if (GpuArray_setarray(&
%(zz)
s->ga, &
%(vv)
s->ga) != GA_NO_ERROR) {
PyErr_SetString(PyExc_ValueError, "setarray failed");
%(fail)
s
}
"""
%
dict
(
name
=
name
,
ndim
=
ndim
,
zz
=
zz
,
vv
=
vv
,
fail
=
sub
[
'fail'
])
def
grad
(
self
,
input
,
grads
)
:
return
[
None
for
i
in
inputs
]
if
config
.
gpuarray
.
sync
:
code
+=
"GpuArray_sync(&
%(zz)
s->ga);"
%
dict
(
zz
=
zz
)
def
do_constant_folding
(
self
,
node
):
if
not
getattr
(
node
.
ouputs
[
0
],
'clients'
,
[]):
return
False
for
client
in
node
.
outputs
[
0
]
.
clients
:
if
client
[
0
]
==
'output'
:
return
False
return
True
return
code
def
c_code_cache_version
(
self
):
return
(
1
,)
gpu_alloc
=
GpuAlloc
()
class
GpuReshape
(
HideC
,
tensor
.
Reshape
):
"""
Implement Reshape on the gpu.
"""
# __hash__, __eq__, __str__ come from tensor.Reshape
def
make_node
(
self
,
x
,
shp
):
x
=
as_gpuarray_variable
(
x
)
res
=
host_from_gpu
(
x
)
.
reshape
(
shp
,
ndim
=
self
.
ndim
)
otype
=
GpuArrayType
(
dtype
=
res
.
dtype
,
broadcastable
=
res
.
broadcastable
)
return
Apply
(
self
,
[
x
,
shp
],
[
otype
()])
def
perform
(
self
,
node
,
inp
,
out_
):
x
,
shp
=
inp
out
,
=
out_
if
(
len
(
shp
)
!=
self
.
ndim
):
raise
ValueError
(
'shape argument to GpuReshape.perform'
' has incorrect length
%
i'
', should be
%
i'
%
(
len
(
shp
),
self
.
ndim
),
shp
)
s
=
shp
.
prod
()
if
shp
.
prod
()
!=
x
.
size
:
# We need to do check here to raise the same error as NumPy.
# We should make pygpu do the same.
ss
=
1
nb_m1
=
0
for
i
in
shp
:
if
i
==
-
1
:
nb_m1
+=
1
else
:
ss
*=
i
if
nb_m1
>
1
:
raise
ValueError
(
"Only one -1 is accepted in the new shape"
)
elif
nb_m1
==
1
:
if
(
x
.
size
%
ss
)
!=
0
:
raise
ValueError
(
"When using -1 in new shape, the computed new shape must be an multiple of the original shape."
)
else
:
raise
ValueError
(
"total size of new array must be unchanged"
)
out
[
0
]
=
x
.
reshape
(
tuple
(
shp
))
theano/sandbox/gpuarray/blas.py
0 → 100644
浏览文件 @
8bd900f8
from
theano
import
Op
,
Apply
,
config
from
theano.tensor.blas
import
Gemv
,
Gemm
from
theano.sandbox.gpuarray.basic_ops
import
(
HideC
,
as_gpuarray_variable
)
try
:
import
pygpu
from
pygpu
import
blas
except
ImportError
,
e
:
# To make sure theano is importable
pass
class
BlasOp
(
HideC
):
def
c_headers
(
self
):
return
[
'<blas_api.h>'
]
def
c_header_dirs
(
self
):
return
[
pygpu
.
get_include
()]
def
c_init_code
(
self
):
return
[
'import_pygpu__blas();'
]
class
GpuGemv
(
BlasOp
,
Gemv
):
def
make_node
(
self
,
y
,
alpha
,
A
,
x
,
beta
):
res
=
Gemv
.
make_node
(
self
,
y
,
alpha
,
A
,
x
,
beta
)
A
=
as_gpuarray_variable
(
A
)
x
=
as_gpuarray_variable
(
x
)
y
=
as_gpuarray_variable
(
y
)
return
Apply
(
self
,
[
y
,
alpha
,
A
,
x
,
beta
],
[
y
.
type
()])
def
perform
(
self
,
node
,
inputs
,
out_storage
):
y
,
alpha
,
A
,
x
,
beta
=
inputs
out_storage
[
0
][
0
]
=
blas
.
gemv
(
alpha
,
A
,
x
,
beta
,
y
,
trans
=
False
,
overwrite_y
=
self
.
inplace
)
def
c_code
(
self
,
node
,
name
,
inp
,
out
,
sub
):
vars
=
dict
(
out
=
out
[
0
],
y
=
inp
[
0
],
alpha
=
inp
[
1
],
A
=
inp
[
2
],
x
=
inp
[
3
],
beta
=
inp
[
4
],
fail
=
sub
[
'fail'
],
name
=
name
)
if
self
.
inplace
:
code
=
"""
Py_XDECREF(
%(out)
s);
%(out)
s =
%(y)
s;
Py_INCREF(
%(out)
s);
"""
%
vars
else
:
code
=
"""
Py_XDECREF(
%(out)
s);
%(out)
s = pygpu_copy(
%(y)
s, GA_ANY_ORDER);
if (
%(out)
s == NULL) {
%(fail)
s
}
"""
%
vars
code
+=
"""
if (pygpu_blas_rgemv(cb_no_trans,
((dtype_
%(alpha)
s *)PyArray_DATA(
%(alpha)
s))[0],
%(A)
s,
%(x)
s,
((dtype_
%(beta)
s *)PyArray_DATA(
%(beta)
s))[0],
%(out)
s) == NULL) {
%(fail)
s
}
"""
%
vars
if
config
.
gpuarray
.
sync
:
code
+=
"""
GpuArray_sync(&
%(out)
s->ga);
"""
return
code
def
c_code_cache_version
(
self
):
return
(
0
,)
gpugemv_no_inplace
=
GpuGemv
(
inplace
=
False
)
gpugemv_inplace
=
GpuGemv
(
inplace
=
True
)
class
GpuGemm
(
BlasOp
,
Gemm
):
def
make_node
(
self
,
C
,
alpha
,
A
,
B
,
beta
):
res
=
Gemm
.
make_node
(
self
,
C
,
alpha
,
A
,
B
,
beta
)
A
=
as_gpuarray_variable
(
A
)
B
=
as_gpuarray_variable
(
B
)
C
=
as_gpuarray_variable
(
C
)
return
Apply
(
self
,
[
C
,
alpha
,
A
,
B
,
beta
],
[
C
.
type
()])
def
perform
(
self
,
node
,
inputs
,
outputs
):
C
,
alpha
,
A
,
B
,
beta
=
inputs
outputs
[
0
][
0
]
=
blas
.
gemm
(
alpha
,
A
,
B
,
beta
,
C
,
overwrite_c
=
self
.
inplace
)
def
c_code
(
self
,
node
,
name
,
inp
,
out
,
sub
):
vars
=
dict
(
out
=
out
[
0
],
C
=
inp
[
0
],
alpha
=
inp
[
1
],
A
=
inp
[
2
],
B
=
inp
[
3
],
beta
=
inp
[
4
],
fail
=
sub
[
'fail'
],
name
=
name
)
if
self
.
inplace
:
code
=
"""
Py_XDECREF(
%(out)
s);
%(out)
s =
%(C)
s;
Py_INCREF(
%(out)
s);
"""
%
vars
else
:
code
=
"""
Py_XDECREF(
%(out)
s);
%(out)
s = pygpu_copy(
%(C)
s, GA_ANY_ORDER);
if (
%(out)
s == NULL) {
%(fail)
s
}
"""
%
vars
code
+=
"""
if (pygpu_blas_rgemm(cb_no_trans, cb_no_trans,
((dtype_
%(alpha)
s *)PyArray_DATA(
%(alpha)
s))[0],
%(A)
s,
%(B)
s,
((dtype_
%(beta)
s *)PyArray_DATA(
%(beta)
s))[0],
%(out)
s) == NULL) {
%(fail)
s
}
"""
%
vars
if
config
.
gpuarray
.
sync
:
code
+=
"""
GpuArray_sync(&
%(out)
s->ga);
"""
return
code
def
c_code_cache_version
(
self
):
return
(
0
,)
gpugemm_no_inplace
=
GpuGemm
(
inplace
=
False
)
gpugemm_inplace
=
GpuGemm
(
inplace
=
True
)
from
theano.compile
import
optdb
from
theano.gof
import
local_optimizer
,
LocalOptGroup
from
theano.tensor.opt
import
in2out
@local_optimizer
([
gpugemv_no_inplace
])
def
local_inplace_gpuagemv
(
node
):
if
node
.
op
==
gpugemv_no_inplace
:
return
[
gpugemv_inplace
(
*
node
.
inputs
)]
@local_optimizer
([
gpugemm_no_inplace
])
def
local_inplace_gpuagemm
(
node
):
if
node
.
op
==
gpugemm_no_inplace
:
return
[
gpugemm_inplace
(
*
node
.
inputs
)]
gpuablas_opt_inplace
=
in2out
(
LocalOptGroup
(
local_inplace_gpuagemv
,
local_inplace_gpuagemm
),
name
=
'gpuablas_opt_inplace'
)
optdb
.
register
(
'InplaceGpuaBlasOpt'
,
gpuablas_opt_inplace
,
70.0
,
'fast_run'
,
'inplace'
,
'gpuarray'
)
theano/sandbox/gpuarray/elemwise.py
浏览文件 @
8bd900f8
import
copy
from
itertools
import
izip
import
numpy
from
theano
import
Op
,
Apply
,
scalar
from
theano
import
Op
,
Apply
,
scalar
,
config
from
theano.tensor.elemwise
import
Elemwise
,
DimShuffle
,
CAReduceDtype
try
:
import
pygpu
from
pygpu.tools
import
ScalarArg
,
ArrayArg
from
pygpu.elemwise
import
ElemwiseKernel
from
pygpu.reduction
import
ReductionKernel
except
ImportError
:
pass
from
basic_ops
import
as_gpuarray_variable
from
type
import
GpuArrayType
from
theano.sandbox.gpuarray.basic_ops
import
as_gpuarray_variable
,
HideC
from
t
heano.sandbox.gpuarray.t
ype
import
GpuArrayType
from
theano.gof.utils
import
MethodNotDefined
def
_is_scalar
(
v
):
False
def
make_argument
(
v
,
name
):
if
_is_scalar
(
v
):
return
ScalarArg
(
numpy
.
dtype
(
v
.
type
.
dtype
),
name
)
else
:
return
ArrayArg
(
numpy
.
dtype
(
v
.
type
.
dtype
),
name
)
def
ensure_out
(
o
,
ref
):
if
o
is
None
:
return
ref
.
_empty_like_me
()
else
:
return
o
class
GpuElemwise
(
Op
):
nin
=
property
(
lambda
self
:
self
.
scalar_op
.
nin
)
nout
=
property
(
lambda
self
:
self
.
scalar_op
.
nout
)
def
__init__
(
self
,
scalar_op
):
self
.
scalar_op
=
scalar_op
self
.
destroy_map
=
{}
def
ensure_allocated
(
storage
,
shape
,
dtype
):
odat
=
storage
[
0
]
if
odat
is
not
None
:
if
odat
.
shape
!=
shape
:
# It is unsafe to try to resize odat,
# we have to allocate output storage.
odat
=
None
if
odat
is
None
:
odat
=
pygpu
.
empty
(
shape
,
dtype
=
dtype
)
storage
[
0
]
=
odat
return
odat
def
__getstate__
(
self
):
d
=
copy
.
copy
(
self
.
__dict__
)
d
.
pop
(
'__epydoc_asRoutine'
,
None
)
d
.
pop
(
'_hashval'
)
return
d
def
__setstate__
(
self
,
d
):
self
.
__dict__
.
update
(
d
)
self
.
_rehash
(
)
def
as_C_string_const
(
s
):
return
'
\n
'
.
join
(
'"
%
s
\\
n"'
%
(
l
.
replace
(
'"'
,
'
\\
"'
)
)
for
l
in
s
.
split
(
'
\n
'
)
)
def
__eq__
(
self
,
other
):
return
(
type
(
self
)
==
type
(
other
)
and
self
.
scalar_op
==
other
.
scalar_op
)
def
__hash__
(
self
):
return
hash
(
type
(
self
))
^
hash
(
self
.
scalar_op
)
class
GpuElemwise
(
HideC
,
Elemwise
):
nin
=
property
(
lambda
self
:
self
.
scalar_op
.
nin
)
nout
=
property
(
lambda
self
:
self
.
scalar_op
.
nout
)
def
__str__
(
self
):
return
"GpuElemwise{
%
s}(gpuarray)"
%
(
self
.
scalar_op
,)
if
self
.
name
is
not
None
:
return
self
.
name
items
=
str
(
sorted
(
self
.
inplace_pattern
.
items
()))
return
"GpuElemwise{
%
s}
%
s<gpuarray>"
%
(
self
.
scalar_op
,
items
)
def
make_node
(
self
,
*
inputs
):
_inputs
=
[
as_gpuarray_variable
(
i
)
for
i
in
inputs
]
if
self
.
nin
>
0
and
len
(
_inputs
)
!=
self
.
nin
:
raise
TypeError
(
"Wrong argument count"
,
(
self
.
nin
,
len
(
_inputs
)))
for
i
in
_inputs
[
1
:]:
if
i
.
type
.
ndim
!=
inputs
[
0
]
.
type
.
ndim
:
raise
TypeError
(
'mismatched rank amongst inputs'
)
broadcastable
=
[]
for
d
in
xrange
(
_inputs
[
0
]
.
type
.
ndim
):
bcast_d
=
True
for
i
in
_inputs
:
if
not
i
.
type
.
broadcastable
[
d
]:
bcast_d
=
False
break
broadcastable
.
append
(
bcast_d
)
assert
len
(
broadcastable
)
==
_inputs
[
0
]
.
type
.
ndim
assert
self
.
nout
>
0
res
=
Elemwise
.
make_node
(
self
,
*
inputs
)
outputs
=
[
GpuArrayType
(
broadcastable
=
o
.
type
.
broadcastable
,
dtype
=
o
.
type
.
dtype
)()
for
o
in
res
.
outputs
]
inputs
=
[
as_gpuarray_variable
(
i
)
for
i
in
inputs
]
res
=
Apply
(
self
,
inputs
,
outputs
)
# Try to generate the kernel to catch SupportCodeErrors
k
=
self
.
generate_kernel
(
res
,
'test'
)
return
res
def
generate_kernel
(
self
,
node
,
nodename
):
inps
=
[
make_argument
(
i
,
'i
%
d'
%
(
n
,))
for
n
,
i
in
enumerate
(
inputs
)]
scal_ins
=
[
scalar
.
Scalar
(
i
.
dtype
)
for
i
in
inputs
]
res
=
Apply
(
self
,
_inputs
,
[
GpuArrayType
(
o
.
dtype
,
broadcastable
)()
for
o
in
self
.
scalar_op
.
output_types
(
scal_ins
)])
enumerate
(
node
.
inputs
)]
scal_ins
=
[
scalar
.
Scalar
(
i
.
dtype
)
for
i
in
node
.
inputs
]
outs
=
[
make_argument
(
o
,
'o
%
d'
%
(
n
,))
for
n
,
o
in
enumerate
(
res
.
outputs
)
]
scal_out
=
[
scalar
.
Scalar
(
o
.
dtype
)
for
o
in
res
.
outputs
]
enumerate
(
node
.
outputs
)
if
not
n
in
self
.
inplace_pattern
]
scal_out
=
[
scalar
.
Scalar
(
o
.
dtype
)
for
o
in
node
.
outputs
]
fake_node
=
Apply
(
self
.
scalar_op
,
[
i
()
for
i
in
scal_ins
],
[
o
()
for
o
in
scal_out
])
kcode
=
self
.
scalar_op
.
c_code
(
fake_node
,
'kcode'
,
[
i
.
expr
()
for
i
in
inps
],
[
o
.
expr
()
for
o
in
outs
],
sub
=
dict
(
fail
=
'return;'
))
res
.
tag
.
kcode
=
kcode
try
:
code
=
self
.
scalar_op
.
c_support_code_apply
(
fake_node
,
'kcode'
)
code
=
self
.
scalar_op
.
c_support_code_apply
(
fake_node
,
nodename
)
if
code
:
raise
SupportCodeError
()
raise
SupportCodeError
(
code
)
except
MethodNotDefined
:
pass
support_code
=
""
try
:
support_code
+
=
self
.
scalar_op
.
c_support_code
()
support_code
=
self
.
scalar_op
.
c_support_code
()
except
MethodNotDefined
:
pass
if
support_code
!=
"#define THEANO_MACRO_MOD(x,y) (x
%
y)"
:
# Avoid the C++ complex struct
raise
SupportCodeError
()
if
(
support_code
.
strip
()
!=
"#define THEANO_MACRO_MOD(x,y) (x
%
y)"
and
support_code
.
strip
()
!=
""
):
# The macro is fine, the C++ struct is not.
raise
SupportCodeError
(
support_code
)
k
=
ElemwiseKernel
(
None
,
inps
+
outs
,
kcode
,
preamble
=
support_code
)
res
.
tag
.
kernel
=
k
scal_out
=
[]
oi
=
0
for
n
in
range
(
len
(
fake_node
.
outputs
)):
if
n
in
self
.
inplace_pattern
:
scal_out
.
append
(
inps
[
self
.
inplace_pattern
[
n
]]
.
name
+
'[i]'
)
else
:
scal_out
.
append
(
outs
[
oi
]
.
name
+
'[i]'
)
oi
+=
1
return
res
kop
=
self
.
scalar_op
.
c_code
(
fake_node
,
nodename
+
'_scalar'
,
[
i
.
name
+
'[i]'
for
i
in
inps
],
scal_out
,
dict
(
fail
=
'return;'
))
# Translate types for scalar composite ops (except complex).
support_code
+=
"""
#define npy_float64 ga_double
#define npy_float32 ga_float
#define npy_uint8 ga_ubyte
#define npy_int8 ga_byte
#define npy_uint16 ga_ushort
#define npy_int16 ga_short
#define npy_uint32 ga_uint
#define npy_int32 ga_int
#define npy_uint64 ga_ulong
#define npy_int64 ga_long
"""
return
ElemwiseKernel
(
None
,
inps
+
outs
,
kop
,
preamble
=
support_code
)
def
c_support_code_apply
(
self
,
node
,
nodename
):
# This is useless by itself, but will serve an eventual c_code
# implementation
k
=
self
.
generate_kernel
(
node
,
nodename
)
nd
=
node
.
inputs
[
0
]
.
type
.
ndim
res
=
[]
for
i
in
range
(
1
,
nd
):
var
=
"static const char
%
s_
%
s[] = "
%
(
nodename
,
str
(
i
))
res
.
append
(
var
+
as_C_string_const
(
k
.
render_basic
(
i
))
+
';'
)
res
.
append
(
"static const gpukernel *
%
s_
%
s_k = NULL;"
%
(
nodename
,
str
(
i
)))
var
=
"static const char
%
s_c[] = "
%
(
nodename
,)
res
.
append
(
var
+
as_C_string_const
(
k
.
contig_src
)
+
';'
)
res
.
append
(
"static const gpukernel *
%
s_c_k = NULL;"
%
(
nodename
,))
return
'
\n
'
.
join
(
res
)
def
perform
(
self
,
node
,
inputs
,
output_storage
):
# Try to reuse the kernel from a previous call to hopefully
# avoid recompiling
if
not
hasattr
(
node
,
'_cache_elemwise_k'
):
node
.
_cache_elemwise_k
=
self
.
generate_kernel
(
node
,
"kcode"
)
out_shape
=
[]
for
values
in
izip
(
*
[
input
.
shape
for
input
in
inputs
]):
if
any
(
v
==
0
for
v
in
values
):
# All non-broadcasted dimensions should be zero
assert
max
(
values
)
<=
1
out_shape
.
append
(
0
)
else
:
out_shape
.
append
(
max
(
values
))
out_shape
=
tuple
(
out_shape
)
def
perform
(
self
,
node
,
inps
,
out
):
k
=
node
.
tag
.
kernel
outs
=
[
ensure_out
(
o
[
0
],
inps
[
0
])
for
o
in
out
]
args
=
copy
.
copy
(
inputs
)
for
n
,
(
stor
,
out
)
in
enumerate
(
izip
(
output_storage
,
node
.
outputs
)):
if
n
in
self
.
inplace_pattern
:
stor
[
0
]
=
inputs
[
self
.
inplace_pattern
[
n
]]
else
:
args
.
append
(
ensure_allocated
(
stor
,
out_shape
,
out
.
type
.
dtype
))
# the dict call is there to avoid syntax error in python <= 2.5
k
(
*
(
inps
+
outs
),
**
dict
(
broadcast
=
True
))
# the dict call is there to avoid a syntax error in python < 2.6
node
.
_cache_elemwise_k
(
*
args
,
**
dict
(
broadcast
=
True
))
if
config
.
gpuarray
.
sync
:
output_storage
[
0
][
0
]
.
sync
()
for
o
,
og
in
zip
(
out
,
outs
):
o
[
0
]
=
og
class
SupportCodeError
(
Exception
):
"""
We do not support certain things (such as the C++ complex struct)
"""
class
GpuDimShuffle
(
HideC
,
DimShuffle
):
def
make_node
(
self
,
input
):
res
=
DimShuffle
.
make_node
(
self
,
input
)
otype
=
GpuArrayType
(
dtype
=
res
.
outputs
[
0
]
.
type
.
dtype
,
broadcastable
=
res
.
outputs
[
0
]
.
type
.
broadcastable
)
input
=
as_gpuarray_variable
(
input
)
return
Apply
(
self
,
[
input
],
[
otype
()])
def
__str__
(
self
):
if
self
.
inplace
:
s
=
"InplaceGpuDimShuffle{
%
s}"
else
:
s
=
"GpuDimShuffle{
%
s}"
return
s
%
(
','
.
join
(
str
(
x
)
for
x
in
self
.
new_order
))
def
perform
(
self
,
node
,
inp
,
out
):
input
,
=
inp
storage
,
=
out
res
=
input
res
=
res
.
transpose
(
self
.
shuffle
+
self
.
drop
)
shape
=
list
(
res
.
shape
[:
len
(
self
.
shuffle
)])
for
augm
in
self
.
augment
:
shape
.
insert
(
augm
,
1
)
res
=
res
.
reshape
(
shape
)
if
not
self
.
inplace
:
res
=
res
.
copy
()
storage
[
0
]
=
res
def
c_support_code_apply
(
self
,
node
,
name
):
def
copy_shape
(
nd_out
):
stmts
=
[]
e
=
0
for
d
in
range
(
nd_out
):
if
d
in
self
.
augment
:
stmts
.
append
(
"sh[
%
s] = 1;"
%
(
d
,))
else
:
stmts
.
append
(
"sh[
%
s] = tmp->ga.dimensions[
%
s];"
%
(
d
,
e
))
e
+=
1
return
'
\n
'
.
join
(
stmts
)
return
"""
static const unsigned int
%(name)
s_ax[] = {
%(shuffle)
s};
static PyGpuArrayObject *
%(name)
s_f(PyGpuArrayObject *a) {
PyGpuArrayObject *res, *tmp;
size_t sh[
%(nd_out)
s];
tmp = pygpu_transpose(a,
%(name)
s_ax);
if (!tmp) return NULL;
%(copy_shape)
s
res = pygpu_reshape(tmp,
%(nd_out)
s, sh, GA_ANY_ORDER, 1, -1);
Py_DECREF(tmp);
return res;
}
"""
%
dict
(
shuffle
=
', '
.
join
(
str
(
a
)
for
a
in
(
self
.
shuffle
+
self
.
drop
)),
name
=
name
,
nd_out
=
len
(
self
.
new_order
),
copy_shape
=
copy_shape
(
len
(
self
.
new_order
)))
def
c_code
(
self
,
node
,
name
,
inputs
,
outputs
,
sub
):
d
=
dict
(
name
=
name
,
fail
=
sub
[
'fail'
],
inp
=
inputs
[
0
],
out
=
outputs
[
0
],
nd
=
len
(
self
.
input_broadcastable
))
process
=
"""
PyGpuArrayObject *tmp = NULL;
if (
%(inp)
s->ga.nd !=
%(nd)
s) {
PyErr_SetString(PyExc_TypeError, "input nd");
%(fail)
s
}
Py_XDECREF(
%(out)
s);
%(out)
s =
%(name)
s_f(
%(inp)
s);
if (
%(out)
s == NULL) {
%(fail)
s}
"""
%
d
if
not
self
.
inplace
:
process
+=
"""
tmp = pygpu_copy(
%(out)
s, GA_ANY_ORDER);
Py_DECREF(
%(out)
s);
if (!tmp) {
%(out)
s = NULL;
%(fail)
s
}
%(out)
s = tmp;
"""
%
d
return
process
def
c_code_cache_version
(
self
):
return
(
3
,)
class
GpuCAReduce
(
HideC
,
CAReduceDtype
):
def
__init__
(
self
,
scalar_op
,
axis
=
None
,
dtype
=
None
,
acc_dtype
=
None
):
if
not
hasattr
(
scalar_op
,
'identity'
):
raise
ValueError
(
"No identity on scalar op"
)
CAReduceDtype
.
__init__
(
self
,
scalar_op
,
axis
=
axis
,
dtype
=
dtype
,
acc_dtype
=
acc_dtype
)
def
__str__
(
self
):
ax
=
''
if
self
.
axis
is
not
None
:
ax
=
'{
%
s}'
%
(
', '
.
join
(
str
(
x
)
for
x
in
self
.
axis
),)
return
"GpuReduce{
%
s}
%
s"
%
(
self
.
scalar_op
,
ax
)
def
make_node
(
self
,
input
):
res
=
CAReduceDtype
.
make_node
(
self
,
input
)
input
=
as_gpuarray_variable
(
input
)
otype
=
GpuArrayType
(
dtype
=
res
.
outputs
[
0
]
.
dtype
,
broadcastable
=
res
.
outputs
[
0
]
.
broadcastable
)
if
res
.
op
.
axis
is
not
None
:
redux
=
[]
for
i
in
range
(
len
(
input
.
type
.
broadcastable
)):
redux
.
append
(
i
in
res
.
op
.
axis
)
# since redux is just another way to describe what is in axis
# it doesn't need to be compared in __eq__ or __hash__
res
.
op
.
redux
=
redux
return
Apply
(
res
.
op
,
[
input
],
[
otype
()])
def
make_thunk
(
self
,
node
,
storage_map
,
compute_map
,
no_recycling
):
if
self
.
axis
is
None
:
redux
=
[
True
]
*
node
.
inputs
[
0
]
.
ndim
else
:
redux
=
self
.
redux
acc_dtype
=
getattr
(
self
,
'acc_dtype'
,
None
)
if
acc_dtype
is
None
:
acc_dtype
=
node
.
outputs
[
0
]
.
type
.
dtype
if
any
(
redux
):
node
.
_cache_reduction_k
=
self
.
generate_kernel
(
node
,
acc_dtype
,
redux
)
return
super
(
GpuCAReduce
,
self
)
.
make_thunk
(
node
,
storage_map
,
compute_map
,
no_recycling
)
def
generate_kernel
(
self
,
node
,
odtype
,
redux
):
if
isinstance
(
self
.
scalar_op
,
scalar
.
basic
.
Add
):
reduce_expr
=
"a + b"
elif
isinstance
(
self
.
scalar_op
,
scalar
.
basic
.
Mul
):
reduce_expr
=
"a * b"
else
:
raise
NotImplementedError
()
return
ReductionKernel
(
pygpu
.
get_default_context
(),
odtype
,
self
.
scalar_op
.
identity
,
reduce_expr
,
redux
,
arguments
=
[
make_argument
(
node
.
inputs
[
0
],
'a'
)],
init_nd
=
node
.
inputs
[
0
]
.
ndim
)
def
perform
(
self
,
node
,
inp
,
out
):
input
,
=
inp
output
,
=
out
if
self
.
axis
is
None
:
redux
=
[
True
]
*
input
.
ndim
else
:
redux
=
self
.
redux
if
any
(
redux
):
output
[
0
]
=
node
.
_cache_reduction_k
(
input
)
.
astype
(
copy
=
False
,
dtype
=
node
.
outputs
[
0
]
.
type
.
dtype
)
else
:
output
[
0
]
=
pygpu
.
gpuarray
.
array
(
input
,
copy
=
True
,
dtype
=
node
.
outputs
[
0
]
.
type
.
dtype
)
theano/sandbox/gpuarray/opt.py
浏览文件 @
8bd900f8
import
theano
,
numpy
from
theano
import
tensor
import
copy
import
theano
import
numpy
from
theano
import
tensor
,
scalar
from
theano.compile
import
optdb
from
theano.gof
import
(
local_optimizer
,
EquilibriumDB
,
SequenceDB
,
ProxyDB
,
Optimizer
,
toolbox
,
DestroyHandler
,
...
...
@@ -8,8 +10,12 @@ from theano.gof import (local_optimizer, EquilibriumDB, SequenceDB, ProxyDB,
from
theano.gof.python25
import
all
,
any
from
theano.sandbox.gpuarray.type
import
GpuArrayType
from
basic_ops
import
host_from_gpu
,
gpu_from_host
,
gpu_alloc
from
elemwise
import
GpuElemwise
,
_is_scalar
from
theano.sandbox.gpuarray.basic_ops
import
(
host_from_gpu
,
gpu_from_host
,
gpu_alloc
,
GpuReshape
)
from
theano.sandbox.gpuarray.elemwise
import
(
GpuElemwise
,
_is_scalar
,
GpuDimShuffle
,
GpuCAReduce
)
from
theano.sandbox.gpuarray.subtensor
import
GpuSubtensor
from
theano.sandbox.gpuarray.blas
import
GpuGemv
,
GpuGemm
gpu_optimizer
=
EquilibriumDB
()
gpu_cut_copies
=
EquilibriumDB
()
...
...
@@ -26,6 +32,7 @@ optdb.register('gpuarray_opt', gpu_seqopt,
optdb
.
__position__
.
get
(
'add_destroy_handler'
,
49.5
)
-
1
,
'gpuarray'
)
def
register_opt
(
*
tags
,
**
kwargs
):
def
f
(
local_opt
):
name
=
(
kwargs
and
kwargs
.
pop
(
'name'
))
or
local_opt
.
__name__
...
...
@@ -35,6 +42,36 @@ def register_opt(*tags, **kwargs):
register_opt
()(
theano
.
tensor
.
opt
.
local_track_shape_i
)
def
op_lifter
(
OP
):
"""
OP(..., host_from_gpu(), ...) -> host_from_gpu(GpuOP(...))
gpu_from_host(OP(inp0, ...)) -> GpuOP(inp0, ...)
"""
def
f
(
maker
):
def
local_opt
(
node
):
if
type
(
node
.
op
)
is
OP
:
# This does not support nodes that have more than one output.
assert
len
(
node
.
outputs
)
==
1
# either one of our inputs is on the gpu or
# all of our client are on the gpu
if
(
any
([
i
.
owner
and
i
.
owner
.
op
==
host_from_gpu
for
i
in
node
.
inputs
])
or
all
([
c
!=
'output'
and
c
.
op
==
gpu_from_host
for
c
,
idx
in
node
.
outputs
[
0
]
.
clients
])):
new_op
=
maker
(
node
)
# This is needed as sometimes new_op inherit from OP.
if
new_op
and
new_op
!=
node
.
op
:
if
isinstance
(
new_op
,
theano
.
Op
):
return
[
host_from_gpu
(
new_op
(
*
node
.
inputs
))]
else
:
# suppose it is a variable on the GPU
return
[
host_from_gpu
(
new_op
)]
return
False
local_opt
.
__name__
=
maker
.
__name__
return
local_optimizer
([
OP
])(
local_opt
)
return
f
class
InputToGpuOptimizer
(
Optimizer
):
"Transfer the input to the gpu to start the rolling wave."
...
...
@@ -63,6 +100,7 @@ class InputToGpuOptimizer(Optimizer):
gpu_seqopt
.
register
(
'InputToGpuArrayOptimizer'
,
InputToGpuOptimizer
(),
0
,
'fast_run'
,
'fast_compile'
,
'merge'
)
@local_optimizer
([])
def
local_cut_gpu_host_gpu
(
node
):
if
tensor
.
opt
.
opt
.
check_chain
(
node
,
gpu_from_host
,
host_from_gpu
):
...
...
@@ -78,67 +116,117 @@ gpu_cut_copies.register('cut_gpua_constant_transfers',
optdb
[
'canonicalize'
]
.
register
(
'local_cut_gpua_host_gpua'
,
local_cut_gpu_host_gpu
,
'fast_run'
,
'gpuarray'
)
@register_opt
()
@
local_optimizer
([
tensor
.
Alloc
]
)
@
op_lifter
(
tensor
.
Alloc
)
def
local_gpualloc
(
node
):
replace
=
False
if
node
.
op
==
tensor
.
alloc
:
if
node
.
inputs
[
0
]
.
owner
and
node
.
inputs
[
0
]
.
owner
.
op
==
host_from_gpu
:
replace
=
True
elif
all
([
c
!=
'output'
and
c
.
op
==
gpu_from_host
for
c
,
idx
in
node
.
outputs
[
0
]
.
clients
]):
replace
=
True
elif
all
([
c
!=
'output'
and
c
.
op
==
tensor
.
join
and
all
([
i
.
owner
and
i
.
owner
.
op
in
[
host_from_gpu
,
tensor
.
alloc
]
for
i
in
c
.
inputs
[
1
:]])
for
c
,
idx
in
node
.
outputs
[
0
]
.
clients
]):
replace
=
True
if
replace
:
val
=
node
.
inputs
[
0
]
shp
=
node
.
inputs
[
1
:]
old_out
=
node
.
outputs
[
0
]
val2
=
tensor
.
shape_padleft
(
val
,
len
(
shp
)
-
val
.
ndim
)
new_out
=
host_from_gpu
(
gpu_alloc
(
val
,
*
shp
))
if
new_out
.
type
!=
old_out
.
type
:
assert
new_out
.
type
.
ndim
==
old_out
.
type
.
ndim
assert
new_out
.
type
.
dtype
==
old_out
.
type
.
dtype
for
b_old
,
b_new
in
zip
(
old_out
.
type
.
broadcastable
,
new_out
.
type
.
broadcastable
):
assert
b_new
or
(
not
b_old
)
new_out
=
tensor
.
patternbroadcast
(
new_out
.
old_out
.
broadcastable
)
return
[
new_out
]
return
gpu_alloc
@register_opt
()
@local_optimizer
([])
@op_lifter
(
tensor
.
Reshape
)
def
local_gpureshape
(
node
):
op
=
node
.
op
name
=
op
.
name
if
name
:
name
=
'Gpu'
+
name
res
=
GpuReshape
(
op
.
ndim
,
op
.
name
)
return
res
@register_opt
()
@op_lifter
(
tensor
.
Flatten
)
def
local_gpuflatten
(
node
):
op
=
node
.
op
if
op
.
outdim
!=
1
:
return
None
res
=
GpuReshape
(
op
.
outdim
,
None
)
o
=
res
(
node
.
inputs
[
0
],
theano
.
tensor
.
constant
([
-
1
]))
return
o
@register_opt
()
@op_lifter
(
tensor
.
Elemwise
)
def
local_gpu_elemwise
(
node
):
do_replace
=
False
gpu_out
=
False
# check for gpu_from_host(Elemwise)) and extract the Elemwise node
if
node
.
op
==
gpu_from_host
:
host_i
,
=
node
.
inputs
if
(
host_i
.
owner
and
isinstance
(
host_i
.
owner
.
op
,
tensor
.
Elemwise
)
and
len
(
host_i
.
clients
)
==
1
):
node
=
host_i
.
owner
do_replace
=
True
gpu_out
=
True
# check for elemwise(..., host_from_gpu, ...)
if
isinstance
(
node
.
op
,
tensor
.
Elemwise
):
if
numpy
.
any
([
i
.
owner
and
i
.
owner
.
op
==
host_from_gpu
for
i
in
node
.
inputs
]):
do_replace
=
True
if
numpy
.
all
([
_is_scalar
(
i
)
for
i
in
node
.
inputs
]):
do_replace
=
False
if
do_replace
:
new_op
=
GpuElemwise
(
node
.
op
.
scalar_op
)
gpu_elemwise
=
new_op
(
*
(
gpu_from_host
(
i
)
for
i
in
node
.
inputs
))
if
gpu_out
:
return
[
gpu_elemwise
]
else
:
return
[
host_from_gpu
(
gpu_elemwise
)]
else
:
return
False
op
=
node
.
op
name
=
op
.
name
if
name
:
name
=
'Gpu'
+
name
res
=
GpuElemwise
(
op
.
scalar_op
,
name
=
name
,
inplace_pattern
=
copy
.
copy
(
op
.
inplace_pattern
),
nfunc_spec
=
op
.
nfunc_spec
)
return
res
def
max_inputs_to_GpuElemwise
(
node
):
ptr_size
=
8
int_size
=
4
# we take the limit from CUDA for now
argument_limit
=
232
ndim
=
node
.
inputs
[
0
]
.
type
.
ndim
# number of elements and shape
size_param_mandatory
=
(
int_size
*
(
ndim
+
1
))
+
\
(
ptr_size
+
int_size
*
ndim
)
*
len
(
node
.
outputs
)
nb_bytes_avail
=
argument_limit
-
size_param_mandatory
nb_bytes_per_input
=
ptr_size
+
ndim
*
int_size
max_nb_inputs
=
nb_bytes_avail
//
nb_bytes_per_input
return
max_nb_inputs
gpu_local_elemwise_fusion
=
tensor
.
opt
.
local_elemwise_fusion_op
(
GpuElemwise
,
max_inputs_to_GpuElemwise
)
optdb
.
register
(
'gpua_elemwise_fusion'
,
tensor
.
opt
.
FusionOptimizer
(
gpu_local_elemwise_fusion
),
71.00
,
'fast_run'
,
'fusion'
,
'local_elemwise_fusion'
,
'gpuarray'
)
inplace_gpu_elemwise_opt
=
tensor
.
opt
.
inplace_elemwise_optimizer_op
(
GpuElemwise
)
optdb
.
register
(
'gpua_inplace_opt'
,
inplace_gpu_elemwise_opt
,
75
,
'inplace_elemwise_optimizer'
,
'fast_run'
,
'inplace'
,
'gpuarray'
)
@register_opt
()
@op_lifter
(
tensor
.
DimShuffle
)
def
local_gpua_dimshuffle
(
node
):
return
GpuDimShuffle
(
node
.
op
.
input_broadcastable
,
node
.
op
.
new_order
)
@register_opt
()
@op_lifter
(
tensor
.
SpecifyShape
)
def
local_gpua_specifyShape
(
node
):
return
tensor
.
specify_shape
@register_opt
()
@op_lifter
(
tensor
.
Subtensor
)
def
local_gpua_subtensor
(
node
):
return
GpuSubtensor
(
node
.
op
.
idx_list
)
@register_opt
()
@op_lifter
(
tensor
.
CAReduce
)
def
local_gpua_careduce
(
node
):
if
(
isinstance
(
node
.
op
.
scalar_op
,
scalar
.
basic
.
Add
)
or
isinstance
(
node
.
op
.
scalar_op
,
scalar
.
basic
.
Mul
)):
return
GpuCAReduce
(
node
.
op
.
scalar_op
,
axis
=
node
.
op
.
axis
,
dtype
=
getattr
(
node
.
op
,
'dtype'
,
None
),
acc_dtype
=
getattr
(
node
.
op
,
'acc_dtype'
,
None
))
@register_opt
()
@op_lifter
(
tensor
.
blas
.
Gemv
)
def
local_gpua_gemv
(
node
):
return
GpuGemv
(
inplace
=
node
.
op
.
inplace
)
@register_opt
()
@op_lifter
(
tensor
.
blas_c
.
CGemv
)
def
local_gpua_gemv2
(
node
):
return
GpuGemv
(
inplace
=
node
.
op
.
inplace
)
@register_opt
()
@op_lifter
(
tensor
.
blas
.
Gemm
)
def
local_gpua_gemm
(
node
):
return
GpuGemm
(
inplace
=
node
.
op
.
inplace
)
theano/sandbox/gpuarray/subtensor.py
0 → 100644
浏览文件 @
8bd900f8
import
StringIO
import
numpy
import
theano
from
theano
import
tensor
,
gof
from
theano.tensor.subtensor
import
Subtensor
,
get_idx_list
from
theano.gof.python25
import
all
,
any
try
:
import
pygpu
from
pygpu
import
gpuarray
except
ImportError
:
pass
from
theano.sandbox.gpuarray.type
import
GpuArrayType
from
theano.sandbox.gpuarray.basic_ops
import
as_gpuarray_variable
,
HideC
class
GpuSubtensor
(
HideC
,
Subtensor
):
def
make_node
(
self
,
x
,
*
inputs
):
rval
=
tensor
.
Subtensor
.
make_node
(
self
,
x
,
*
inputs
)
otype
=
GpuArrayType
(
dtype
=
rval
.
outputs
[
0
]
.
type
.
dtype
,
broadcastable
=
rval
.
outputs
[
0
]
.
type
.
broadcastable
)
x
=
as_gpuarray_variable
(
x
)
return
gof
.
Apply
(
self
,
[
x
]
+
rval
.
inputs
[
1
:],
[
otype
()])
def
perform
(
self
,
node
,
inputs
,
out_
):
out
,
=
out_
x
=
inputs
[
0
]
if
self
.
perform_cache_cdata
is
not
None
:
out
[
0
]
=
x
.
__getitem__
(
self
.
perform_cache_cdata
)
return
cdata
=
get_idx_list
(
inputs
,
self
.
idx_list
)
if
len
(
cdata
)
==
1
:
cdata
=
cdata
[
0
]
if
len
(
inputs
)
==
1
:
self
.
perform_cache_cdata
=
cdata
out
[
0
]
=
x
.
__getitem__
(
cdata
)
def
c_support_code
(
self
):
return
"""
static int fix_indices(ssize_t *start, ssize_t *stop, ssize_t *step,
int start_n, int stop_n, int step_n,
size_t len) {
if (step_n) *step = 1;
if (*step == 0) {
PyErr_SetString(PyExc_ValueError, "slice step cannot be zero");
return -1;
}
if (start_n) *start = (*step < 0) ? len-1 : 0;
else {
if (*start < 0) *start += len;
if (*start < 0) *start = (*step < 0) ? -1 : 0;
if (*start >= len) *start = (*step < 0) ? len-1 : len;
}
if (stop_n) *stop = (*step < 0) ? -1 : len;
else {
if (*stop < 0) *stop += len;
if (*stop < 0) *stop = (*step < 0) ? -1 : 0;
if (*stop >= len) *stop = (*step < 0) ? len-1 : len;
}
if (*stop < *start && *step > 0)
*stop = *start;
return 0;
}
"""
def
c_code
(
self
,
node
,
name
,
inputs
,
outputs
,
sub
):
inp_ndim
=
node
.
inputs
[
0
]
.
ndim
inp
=
inputs
[
0
]
indices
=
inputs
[
1
:]
# pad out the index list to the same dimension as the input
idx_list
=
self
.
idx_list
+
\
((
slice
(
None
),)
*
(
inp_ndim
-
len
(
self
.
idx_list
)))
# This case fails when we use pygpu_index(), so here is some
# special code
if
len
(
idx_list
)
==
0
:
return
"""
Py_XDECREF(
%(out)
s);
%(out)
s = pygpu_copy(
%(inp)
s, GA_ANY_ORDER);
if (!
%(out)
s) {
%(fail)
s }
"""
%
dict
(
out
=
outputs
[
0
],
inp
=
inp
,
fail
=
sub
[
'fail'
])
sio
=
StringIO
.
StringIO
()
print
>>
sio
,
"""
ssize_t starts[
%(sz)
s];
ssize_t stops[
%(sz)
s];
ssize_t steps[
%(sz)
s];
ssize_t cur;
int err;
if (
%(inp)
s->ga.nd !=
%(sz)
s) {
PyErr_SetString(PyExc_IndexError, "invalid index");
%(fail)
s
}
"""
%
dict
(
sz
=
len
(
idx_list
),
inp
=
inp
,
fail
=
sub
[
'fail'
])
def
fix_idx
(
idx
):
if
idx
is
None
:
return
"0"
,
1
elif
isinstance
(
idx
,
(
numpy
.
integer
,
int
)):
return
str
(
idx
),
0
elif
isinstance
(
idx
,
gof
.
Type
):
return
indices
.
pop
(
0
),
0
else
:
assert
0
,
idx
for
i
,
idx
in
enumerate
(
idx_list
):
if
isinstance
(
idx
,
slice
):
start
,
start_n
=
fix_idx
(
idx
.
start
)
stop
,
stop_n
=
fix_idx
(
idx
.
stop
)
step
,
step_n
=
fix_idx
(
idx
.
step
)
print
>>
sio
,
"""
starts[
%(i)
s] =
%(start)
s;
stops[
%(i)
s] =
%(stop)
s;
steps[
%(i)
s] =
%(step)
s;
if (fix_indices(&starts[
%(i)
s], &stops[
%(i)
s], &steps[
%(i)
s],
%(start_n)
s,
%(stop_n)
s,
%(step_n)
s,
%(inp)
s->ga.dimensions[
%(i)
s]) == -1) {
%(fail)
s
}
"""
%
dict
(
i
=
i
,
start
=
start
,
stop
=
stop
,
step
=
step
,
start_n
=
start_n
,
stop_n
=
stop_n
,
step_n
=
step_n
,
fail
=
sub
[
'fail'
],
inp
=
inp
)
else
:
if
isinstance
(
idx
,
gof
.
Type
):
start
=
indices
.
pop
(
0
)
elif
isinstance
(
idx
,
(
numpy
.
integer
,
int
)):
start
=
idx
else
:
assert
0
,
idx
print
>>
sio
,
"""
cur =
%(start)
s;
if (cur < 0)
cur +=
%(inp)
s->ga.dimensions[
%(i)
s];
starts[
%(i)
s] = cur;
steps[
%(i)
s] = 0;
"""
%
dict
(
i
=
i
,
start
=
start
,
fail
=
sub
[
'fail'
],
inp
=
inp
)
print
>>
sio
,
"""
Py_XDECREF(
%(out)
s);
%(out)
s = pygpu_index(
%(inp)
s, starts, stops, steps);
if (!
%(out)
s) {
%(fail)
s }
"""
%
dict
(
name
=
name
,
fail
=
sub
[
'fail'
],
inp
=
inp
,
out
=
outputs
[
0
])
return
sio
.
getvalue
()
def
c_code_cache_version
(
self
):
return
(
5
,)
theano/sandbox/gpuarray/tests/test_basic_ops.py
浏览文件 @
8bd900f8
...
...
@@ -5,8 +5,9 @@ from copy import copy, deepcopy
import
numpy
import
theano
import
theano.tensor
as
T
from
theano.compile
import
DeepCopyOp
from
theano.tensor.tests.test_basic
import
safe_make_node
from
theano.tensor
import
TensorType
from
theano.tensor.basic
import
alloc
from
theano.tensor.tests.test_basic
import
rand
,
safe_make_node
,
T_reshape
from
theano.tests.unittest_tools
import
SkipTest
from
numpy.testing.noseclasses
import
KnownFailureTest
...
...
@@ -33,7 +34,8 @@ from theano.sandbox.gpuarray.type import (GpuArrayType,
gpuarray_shared_constructor
)
from
theano.sandbox.gpuarray.basic_ops
import
(
host_from_gpu
,
gpu_from_host
,
gpu_alloc
,
gpu_from_cuda
,
cuda_from_gpu
)
cuda_from_gpu
,
HostFromGpu
,
GpuFromHost
,
GpuReshape
)
from
theano.tests
import
unittest_tools
as
utt
utt
.
seed_rng
()
...
...
@@ -42,11 +44,10 @@ rng = numpy.random.RandomState(seed=utt.fetch_seed())
from
pygpu
import
gpuarray
if
theano
.
config
.
mode
==
'FAST_COMPILE'
:
mode_with_gpu
=
theano
.
compile
.
mode
.
get_mode
(
'FAST_RUN'
)
.
including
(
'gpuarray'
)
mode_without_gpu
=
theano
.
compile
.
mode
.
get_mode
(
'FAST_RUN'
)
.
excluding
(
'gpuarray'
\
)
mode_with_gpu
=
theano
.
compile
.
mode
.
get_mode
(
'FAST_RUN'
)
.
including
(
'gpuarray'
)
.
excluding
(
'gpu'
)
mode_without_gpu
=
theano
.
compile
.
mode
.
get_mode
(
'FAST_RUN'
)
.
excluding
(
'gpuarray'
)
else
:
mode_with_gpu
=
theano
.
compile
.
mode
.
get_default_mode
()
.
including
(
'gpuarray'
)
mode_with_gpu
=
theano
.
compile
.
mode
.
get_default_mode
()
.
including
(
'gpuarray'
)
.
excluding
(
'gpu'
)
mode_without_gpu
=
theano
.
compile
.
mode
.
get_default_mode
()
.
excluding
(
'gpuarray'
)
...
...
@@ -88,139 +89,110 @@ def fake_shared(value, name=None, strict=False, allow_downcast=None, **kwargs):
def
rand_gpuarray
(
*
shape
,
**
kwargs
):
r
=
rng
.
rand
(
*
shape
)
*
2
-
1
dtype
=
kwargs
.
pop
(
'dtype'
,
theano
.
config
.
floatX
)
cls
=
kwargs
.
pop
(
'cls'
,
None
)
if
len
(
kwargs
)
!=
0
:
raise
TypeError
(
'Unexpected argument
%
s'
,
kwargs
.
keys
()[
0
])
return
gpuarray
.
array
(
r
,
dtype
=
dtype
)
return
gpuarray
.
array
(
r
,
dtype
=
dtype
,
cls
=
cls
)
def
makeTester
(
name
,
op
,
expected
,
good
=
None
,
bad_build
=
None
,
checks
=
None
,
bad_runtime
=
None
,
mode
=
None
,
skip
=
False
,
eps
=
1e-10
):
if
good
is
None
:
good
=
{}
if
bad_build
is
None
:
bad_build
=
{}
if
bad_runtime
is
None
:
bad_runtime
=
{}
def
makeTester
(
name
,
op
,
gpu_op
,
cases
,
checks
=
None
,
mode_gpu
=
mode_with_gpu
,
mode_nogpu
=
mode_without_gpu
,
skip
=
False
,
eps
=
1e-10
):
if
checks
is
None
:
checks
=
{}
_op
=
op
_expected
=
expected
_good
=
good
_bad_build
=
bad_build
_bad_runtime
=
bad_runtime
_gpu_op
=
gpu_op
_cases
=
cases
_skip
=
skip
_checks
=
checks
class
Checker
(
unittest
.
TestCase
):
class
Checker
(
unittest
.
TestCase
,
utt
.
TestOptimizationMixin
):
op
=
staticmethod
(
_op
)
expected
=
staticmethod
(
_expected
)
good
=
_good
bad_build
=
_bad_build
bad_runtime
=
_bad_runtime
gpu_op
=
staticmethod
(
_gpu_op
)
cases
=
_cases
skip
=
_skip
checks
=
_checks
def
setUp
(
self
):
eval
(
self
.
__class__
.
__module__
+
'.'
+
self
.
__class__
.
__name__
)
def
test_
good
(
self
):
def
test_
all
(
self
):
if
skip
:
raise
SkipTest
(
skip
)
for
testname
,
inputs
in
good
.
items
():
inputs
=
[
copy
(
input
)
for
input
in
inputs
]
inputrs
=
[
fake_shared
(
input
)
for
input
in
inputs
]
for
testname
,
inputs
in
cases
.
items
():
self
.
run_case
(
testname
,
inputs
)
try
:
node
=
safe_make_node
(
self
.
op
,
*
inputrs
)
except
Exception
,
exc
:
err_msg
=
(
"Test
%
s::
%
s: Error occured while making "
"a node with inputs
%
s"
)
%
(
self
.
op
,
testname
,
inputs
)
exc
.
args
+=
(
err_msg
,)
raise
def
run_case
(
self
,
testname
,
inputs
):
inputs_ref
=
[
theano
.
shared
(
inp
)
for
inp
in
inputs
]
inputs_tst
=
[
theano
.
shared
(
inp
)
for
inp
in
inputs
]
try
:
f
=
inplace_func
([],
node
.
outputs
,
mode
=
mode
,
name
=
'test_good'
)
except
Exception
,
exc
:
err_msg
=
(
"Test
%
s::
%
s: Error occured while trying to "
"make a Function"
)
%
(
self
.
op
,
testname
)
exc
.
args
+=
(
err_msg
,)
raise
try
:
node_ref
=
safe_make_node
(
self
.
op
,
*
inputs_ref
)
node_tst
=
safe_make_node
(
self
.
op
,
*
inputs_tst
)
except
Exception
,
exc
:
err_msg
=
(
"Test
%
s::
%
s: Error occured while making "
"a node with inputs
%
s"
)
%
(
self
.
gpu_op
,
testname
,
inputs
)
exc
.
args
+=
(
err_msg
,)
raise
if
isinstance
(
self
.
expected
,
dict
)
and
\
testname
in
self
.
expected
:
expecteds
=
self
.
expected
[
testname
]
else
:
expecteds
=
self
.
expected
(
*
inputs
)
if
not
isinstance
(
expecteds
,
(
list
,
tuple
)):
expecteds
=
(
expecteds
,)
try
:
variables
=
f
()
except
Exception
,
exc
:
err_msg
=
(
"Test
%
s::
%
s: Error occured while calling "
"the Function on the inputs
%
s"
)
%
(
self
.
op
,
testname
,
inputs
)
exc
.
args
+=
(
err_msg
,)
raise
try
:
f_ref
=
inplace_func
([],
node_ref
.
outputs
,
mode
=
mode_nogpu
)
f_tst
=
inplace_func
([],
node_tst
.
outputs
,
mode
=
mode_gpu
)
except
Exception
,
exc
:
err_msg
=
(
"Test
%
s::
%
s: Error occured while trying to "
"make a Function"
)
%
(
self
.
gpu_op
,
testname
)
exc
.
args
+=
(
err_msg
,)
raise
for
i
,
(
variable
,
expected
)
in
\
enumerate
(
izip
(
variables
,
expecteds
)):
if
variable
.
dtype
!=
expected
.
dtype
or
\
variable
.
shape
!=
expected
.
shape
or
\
not
GpuArrayType
.
values_eq_approx
(
variable
,
expected
):
self
.
fail
((
"Test
%
s::
%
s: Output
%
s gave the wrong "
"value. With inputs
%
s, expected
%
s "
"(dtype
%
s), got
%
s (dtype
%
s)."
)
%
(
self
.
op
,
testname
,
i
,
inputs
,
expected
,
expected
.
dtype
,
variable
,
variable
.
dtype
))
for
description
,
check
in
self
.
checks
.
items
():
if
not
check
(
inputs
,
variables
):
self
.
fail
((
"Test
%
s::
%
s: Failed check:
%
s "
"(inputs were
%
s, ouputs were
%
s)"
)
%
(
self
.
op
,
testname
,
description
,
inputs
,
variables
))
def
test_bad_build
(
self
):
if
skip
:
raise
SkipTest
(
skip
)
for
testname
,
inputs
in
self
.
bad_build
.
items
():
inputs
=
[
copy
(
input
)
for
input
in
inputs
]
inputrs
=
[
fake_shared
(
input
)
for
input
in
inputs
]
self
.
assertRaises
(
Exception
,
safe_make_node
,
self
.
op
,
*
inputrs
)
self
.
assertFunctionContains1
(
f_tst
,
self
.
gpu_op
)
def
test_bad_runtime
(
self
):
if
skip
:
raise
SkipTest
(
skip
)
for
testname
,
inputs
in
self
.
bad_runtime
.
items
():
inputrs
=
[
fake_shared
(
input
)
for
input
in
inputs
]
try
:
node
=
safe_make_node
(
self
.
op
,
*
inputrs
)
except
Exception
,
exc
:
err_msg
=
(
"Test
%
s::
%
s: Error occured while trying to "
"make a node with inputs
%
s"
)
%
(
self
.
op
,
testname
,
inputs
)
exc
.
args
+=
(
err_msg
,)
raise
ref_e
=
None
try
:
expecteds
=
f_ref
()
except
Exception
,
exc
:
ref_e
=
exc
try
:
f
=
inplace_func
([],
node
.
outputs
,
mode
=
mode
,
name
=
"test_bad_runtime"
)
except
Exception
,
exc
:
err_msg
=
(
"Test
%
s::
%
s:
Error occured while trying to
"
"
make a Function"
)
%
(
self
.
op
,
testname
)
try
:
variables
=
f_tst
()
except
Exception
,
exc
:
if
ref_e
is
None
:
err_msg
=
(
"Test
%
s::
%
s:
exception when calling the
"
"
Function"
)
%
(
self
.
gpu_
op
,
testname
)
exc
.
args
+=
(
err_msg
,)
raise
self
.
assertRaises
(
Exception
,
f
,
[])
else
:
# if we raised an exception of the same type we're good.
if
isinstance
(
exc
,
type
(
ref_e
)):
return
else
:
err_msg
=
(
"Test
%
s::
%
s: exception raised during test "
"call was not the same as the reference "
"call (got:
%
s, expected
%
s)"
)
%
\
(
self
.
gpu_op
,
testname
,
type
(
exc
),
type
(
ref_e
))
exc
.
args
+=
(
err_msg
,)
raise
for
i
,
(
variable
,
expected
)
in
\
enumerate
(
izip
(
variables
,
expecteds
)):
if
variable
.
dtype
!=
expected
.
dtype
or
\
variable
.
shape
!=
expected
.
shape
or
\
not
TensorType
.
values_eq_approx
(
variable
,
expected
):
self
.
fail
((
"Test
%
s::
%
s: Output
%
s gave the wrong "
"value. With inputs
%
s, expected
%
s "
"(dtype
%
s), got
%
s (dtype
%
s)."
)
%
(
self
.
op
,
testname
,
i
,
inputs
,
expected
,
expected
.
dtype
,
variable
,
variable
.
dtype
))
for
description
,
check
in
self
.
checks
.
items
():
if
not
check
(
inputs
,
variables
):
self
.
fail
((
"Test
%
s::
%
s: Failed check:
%
s "
"(inputs were
%
s, ouputs were
%
s)"
)
%
(
self
.
op
,
testname
,
description
,
inputs
,
variables
))
Checker
.
__name__
=
name
return
Checker
...
...
@@ -300,31 +272,37 @@ def gpu_alloc_expected(x, *shp):
GpuAllocTester
=
makeTester
(
name
=
"GpuAllocTester"
,
op
=
gpu_alloc
,
expected
=
gpu_alloc_expected
,
good
=
dict
(
correct01
=
(
rand_gpuarray
(),
numpy
.
int32
(
7
)),
correct01_bcast
=
(
rand_gpuarray
(
1
),
numpy
.
int32
(
7
)),
correct02
=
(
rand_gpuarray
(),
numpy
.
int32
(
4
),
numpy
.
int32
(
7
)),
correct12
=
(
rand_gpuarray
(
7
),
numpy
.
int32
(
4
),
numpy
.
int32
(
7
)),
correct13
=
(
rand_gpuarray
(
7
),
numpy
.
int32
(
2
),
numpy
.
int32
(
4
),
op
=
alloc
,
gpu_op
=
gpu_alloc
,
cases
=
dict
(
correct01
=
(
rand
(),
numpy
.
int32
(
7
)),
# just gives a DeepCopyOp with possibly wrong results on the CPU
# correct01_bcast=(rand(1), numpy.int32(7)),
correct02
=
(
rand
(),
numpy
.
int32
(
4
),
numpy
.
int32
(
7
)),
correct12
=
(
rand
(
7
),
numpy
.
int32
(
4
),
numpy
.
int32
(
7
)),
correct13
=
(
rand
(
7
),
numpy
.
int32
(
2
),
numpy
.
int32
(
4
),
numpy
.
int32
(
7
)),
correct23
=
(
rand
(
4
,
7
),
numpy
.
int32
(
2
),
numpy
.
int32
(
4
),
numpy
.
int32
(
7
)),
correct23
=
(
rand_gpuarray
(
4
,
7
),
numpy
.
int32
(
2
),
numpy
.
int32
(
4
),
numpy
.
int32
(
7
))
),
bad_runtime
=
dict
(
bad_shape12
=
(
rand_gpuarray
(
7
),
numpy
.
int32
(
7
),
numpy
.
int32
(
5
)),
bad_shape12
=
(
rand
(
7
),
numpy
.
int32
(
7
),
numpy
.
int32
(
5
)),
)
)
def
test_deep_copy
():
a
=
rand_gpuarray
(
20
,
dtype
=
'float32'
)
g
=
GpuArrayType
(
dtype
=
'float32'
,
broadcastable
=
(
False
,))(
'g'
)
f
=
theano
.
function
([
g
],
g
)
assert
isinstance
(
f
.
maker
.
fgraph
.
toposort
()[
0
]
.
op
,
DeepCopyOp
)
res
=
f
(
a
)
assert
GpuArrayType
.
values_eq
(
res
,
a
)
class
G_reshape
(
T_reshape
):
def
shortDescription
(
self
):
return
None
def
__init__
(
self
,
name
):
T_reshape
.
__init__
(
self
,
name
,
shared
=
gpuarray_shared_constructor
,
op
=
GpuReshape
,
mode
=
mode_with_gpu
,
# avoid errors with limited devices
# dtype='float32',
ignore_topo
=
(
HostFromGpu
,
GpuFromHost
,
theano
.
compile
.
DeepCopyOp
,
theano
.
sandbox
.
gpuarray
.
elemwise
.
GpuElemwise
,
theano
.
tensor
.
opt
.
Shape_i
,
theano
.
tensor
.
opt
.
MakeVector
))
assert
self
.
op
==
GpuReshape
theano/sandbox/gpuarray/tests/test_blas.py
0 → 100644
浏览文件 @
8bd900f8
from
unittest
import
TestCase
from
theano.tensor.blas
import
gemv_inplace
,
gemm_inplace
from
theano.sandbox.gpuarray.tests.test_basic_ops
import
makeTester
,
rand
from
theano.sandbox.gpuarray.blas
import
(
gpugemv_inplace
,
gpugemm_inplace
)
GpuGemvTester
=
makeTester
(
'GpuGemvTester'
,
op
=
gemv_inplace
,
gpu_op
=
gpugemv_inplace
,
cases
=
dict
(
dot_vv
=
[
rand
(
1
),
1
,
rand
(
1
,
2
),
rand
(
2
),
0
],
dot_vm
=
[
rand
(
3
),
1
,
rand
(
3
,
2
),
rand
(
2
),
0
],
# test_02=[rand(0), 1, rand(0, 2), rand(2), 0],
# test_30=[rand(3), 1, rand(3, 0), rand(0), 0],
# test_00=[rand(0), 1, rand(0, 0), rand(0), 0],
test_stride
=
[
rand
(
3
)[::
-
1
],
1
,
rand
(
3
,
2
)[::
-
1
],
rand
(
2
)[::
-
1
],
0
],
)
)
GpuGemmTester
=
makeTester
(
'GpuGemmTester'
,
op
=
gemm_inplace
,
gpu_op
=
gpugemm_inplace
,
cases
=
dict
(
test1
=
[
rand
(
3
,
4
),
1.0
,
rand
(
3
,
5
),
rand
(
5
,
4
),
0.0
],
test2
=
[
rand
(
3
,
4
),
1.0
,
rand
(
3
,
5
),
rand
(
5
,
4
),
1.0
],
test3
=
[
rand
(
3
,
4
),
1.0
,
rand
(
3
,
5
),
rand
(
5
,
4
),
-
1.0
],
test4
=
[
rand
(
3
,
4
),
0.0
,
rand
(
3
,
5
),
rand
(
5
,
4
),
0.0
],
test5
=
[
rand
(
3
,
4
),
0.0
,
rand
(
3
,
5
),
rand
(
5
,
4
),
0.6
],
test6
=
[
rand
(
3
,
4
),
0.0
,
rand
(
3
,
5
),
rand
(
5
,
4
),
-
1.0
],
test7
=
[
rand
(
3
,
4
),
-
1.0
,
rand
(
3
,
5
),
rand
(
5
,
4
),
0.0
],
test8
=
[
rand
(
3
,
4
),
-
1.0
,
rand
(
3
,
5
),
rand
(
5
,
4
),
1.0
],
test9
=
[
rand
(
3
,
4
),
-
1.0
,
rand
(
3
,
5
),
rand
(
5
,
4
),
-
1.0
],
)
)
theano/sandbox/gpuarray/tests/test_elemwise.py
0 → 100644
浏览文件 @
8bd900f8
import
unittest
from
theano
import
scalar
,
gof
from
theano.gof
import
FunctionGraph
from
theano.gof.python25
import
all
,
any
from
theano.tests.unittest_tools
import
SkipTest
from
theano.tensor.tests.test_elemwise
import
(
test_Broadcast
,
test_DimShuffle
,
test_CAReduce
)
from
theano.sandbox.gpuarray.tests.test_basic_ops
import
rand_gpuarray
from
theano.sandbox.gpuarray.elemwise
import
(
GpuElemwise
,
GpuDimShuffle
,
GpuCAReduce
)
from
theano.sandbox.gpuarray.type
import
GpuArrayType
from
pygpu.array
import
gpuarray
# This is acutally a test for GpuElemwise
class
test_gpu_Broadcast
(
test_Broadcast
):
op
=
GpuElemwise
type
=
GpuArrayType
def
rand_val
(
self
,
shp
):
return
rand_gpuarray
(
*
shp
,
**
dict
(
cls
=
gpuarray
))
# no c_code() yet
#cop = GpuElemwise
#ctype = GpuArrayType
#def rand_cval(self, shp):
# return rand_gpuarray(*shp, **dict(cls=gpuarray))
class
test_GpuDimShuffle
(
test_DimShuffle
):
op
=
GpuDimShuffle
class
test_GpuCAReduce
(
test_CAReduce
):
dtypes
=
[
"float32"
]
bin_dtypes
=
[
"uint8"
,
"int8"
]
op
=
GpuCAReduce
reds
=
[
scalar
.
add
,
scalar
.
mul
]
def
test_perform
(
self
):
for
dtype
in
self
.
dtypes
+
self
.
bin_dtypes
:
for
op
in
self
.
reds
:
self
.
with_linker
(
gof
.
PerformLinker
(),
op
,
dtype
=
dtype
)
def
test_perform_nan
(
self
):
for
dtype
in
self
.
dtypes
:
for
op
in
self
.
reds
:
self
.
with_linker
(
gof
.
PerformLinker
(),
op
,
dtype
=
dtype
,
test_nan
=
True
)
def
test_c
(
self
):
raise
SkipTest
(
"no C code"
)
def
test_c_nan
(
self
):
raise
SkipTest
(
"no C code"
)
theano/sandbox/gpuarray/tests/test_opt.py
0 → 100644
浏览文件 @
8bd900f8
import
numpy
import
theano
from
theano.tests
import
unittest_tools
as
utt
from
theano.sandbox.gpuarray.basic_ops
import
GpuReshape
import
theano.sandbox.gpuarray
if
theano
.
sandbox
.
gpuarray
.
pygpu
is
None
:
raise
SkipTest
(
"pygpu not installed"
)
import
theano.sandbox.cuda
as
cuda_ndarray
if
cuda_ndarray
.
cuda_available
and
not
theano
.
sandbox
.
gpuarray
.
pygpu_activated
:
if
not
cuda_ndarray
.
use
.
device_number
:
cuda_ndarray
.
use
(
'gpu'
)
theano
.
sandbox
.
gpuarray
.
init_dev
(
'cuda'
)
if
not
theano
.
sandbox
.
gpuarray
.
pygpu_activated
:
raise
SkipTest
(
"pygpu disabled"
)
if
theano
.
config
.
mode
==
'FAST_COMPILE'
:
mode_with_gpu
=
theano
.
compile
.
mode
.
get_mode
(
'FAST_RUN'
)
.
including
(
'gpuarray'
)
.
excluding
(
'gpu'
)
mode_without_gpu
=
theano
.
compile
.
mode
.
get_mode
(
'FAST_RUN'
)
.
excluding
(
'gpuarray'
)
else
:
mode_with_gpu
=
theano
.
compile
.
mode
.
get_default_mode
()
.
including
(
'gpuarray'
)
.
excluding
(
'gpu'
)
mode_without_gpu
=
theano
.
compile
.
mode
.
get_default_mode
()
.
excluding
(
'gpuarray'
)
def
test_flatten
():
m
=
theano
.
tensor
.
fmatrix
()
f
=
theano
.
function
([
m
],
m
.
flatten
(),
mode
=
mode_with_gpu
)
val
=
numpy
.
random
.
rand
(
10
,
11
)
.
astype
(
"float32"
)
res
=
f
(
val
)
utt
.
assert_allclose
(
res
,
val
.
flatten
())
assert
res
.
shape
==
val
.
flatten
()
.
shape
assert
GpuReshape
in
[
type
(
node
.
op
)
for
node
in
f
.
maker
.
fgraph
.
toposort
()]
\ No newline at end of file
theano/sandbox/gpuarray/tests/test_subtensor.py
0 → 100644
浏览文件 @
8bd900f8
from
theano.tensor.tests.test_subtensor
import
T_subtensor
from
theano.sandbox.gpuarray.basic_ops
import
(
HostFromGpu
,
GpuFromHost
)
from
theano.sandbox.gpuarray.subtensor
import
GpuSubtensor
from
theano.sandbox.gpuarray.type
import
gpuarray_shared_constructor
from
theano.sandbox.gpuarray.tests.test_basic_ops
import
mode_with_gpu
from
theano.compile
import
DeepCopyOp
from
theano
import
tensor
class
G_subtensor
(
T_subtensor
):
def
shortDescription
(
self
):
return
None
def
__init__
(
self
,
name
):
T_subtensor
.
__init__
(
self
,
name
,
shared
=
gpuarray_shared_constructor
,
sub
=
GpuSubtensor
,
mode
=
mode_with_gpu
,
# avoid errors with limited devices
dtype
=
'float32'
,
ignore_topo
=
(
HostFromGpu
,
GpuFromHost
,
DeepCopyOp
))
assert
self
.
sub
==
GpuSubtensor
theano/sandbox/gpuarray/tests/test_type.py
0 → 100644
浏览文件 @
8bd900f8
import
operator
import
theano
from
theano.compile
import
DeepCopyOp
from
theano.sandbox.gpuarray.tests.test_basic_ops
import
rand_gpuarray
from
theano.sandbox.gpuarray.type
import
GpuArrayType
def
test_deep_copy
():
a
=
rand_gpuarray
(
20
,
dtype
=
'float32'
)
g
=
GpuArrayType
(
dtype
=
'float32'
,
broadcastable
=
(
False
,))(
'g'
)
f
=
theano
.
function
([
g
],
g
)
assert
isinstance
(
f
.
maker
.
fgraph
.
toposort
()[
0
]
.
op
,
DeepCopyOp
)
res
=
f
(
a
)
assert
GpuArrayType
.
values_eq
(
res
,
a
)
theano/sandbox/gpuarray/type.py
浏览文件 @
8bd900f8
import
numpy
import
theano
from
theano.tensor.var
import
_tensor_py_operators
from
theano
import
Type
,
Variable
,
Constant
,
tensor
,
config
,
scalar
from
theano.compile
import
SharedVariable
...
...
@@ -26,7 +27,10 @@ class GpuArrayType(Type):
except
gpuarray
.
GpuArrayException
:
raise
TypeError
(
"Unsupported dtype for
%
s:
%
s"
%
(
self
.
__class__
.
__name__
,
self
.
dtype
))
def
__str__
(
self
):
return
"GpuArrayType(
%
s,
%
s)"
%
(
self
.
dtype
,
self
.
broadcastable
)
def
filter
(
self
,
data
,
strict
=
False
,
allow_downcast
=
None
):
if
strict
:
if
not
isinstance
(
data
,
gpuarray
.
GpuArray
):
...
...
@@ -103,8 +107,8 @@ class GpuArrayType(Type):
return
GpuArrayType
.
values_eq
(
a
,
b
)
else
:
res
=
elemwise2
(
a
,
''
,
b
,
a
,
odtype
=
numpy
.
dtype
(
'bool'
),
op_tmpl
=
"res[i] = ((
%(a)
s -
%(b)
s) <"
\
"(1e-8 + 1e-5 * fabs(
%(b)
s)))"
)
op_tmpl
=
"res[i] = ((
%(a)
s -
%(b)
s) <"
"(1e-8 + 1e-5 * fabs(
%(b)
s)))"
)
return
numpy
.
asarray
(
res
)
.
all
()
def
value_zeros
(
self
,
shape
):
...
...
@@ -134,7 +138,7 @@ class GpuArrayType(Type):
return
numpy
.
dtype
(
self
.
dtype
)
.
itemsize
def
c_declare
(
self
,
name
,
sub
):
return
"GpuArrayObject *
%
s;"
%
(
name
,)
return
"
Py
GpuArrayObject *
%
s;"
%
(
name
,)
def
c_init
(
self
,
name
,
sub
):
return
"
%
s = NULL;"
%
(
name
,)
...
...
@@ -149,17 +153,17 @@ class GpuArrayType(Type):
}
/* First check if we are the base type exactly (the most common case),
then do the full subclass check if needed. */
if (py_
%(name)
s->ob_type != &GpuArrayType &&
!PyObject_TypeCheck(py_
%(name)
s, &GpuArrayType)) {
if (py_
%(name)
s->ob_type != &
Py
GpuArrayType &&
!PyObject_TypeCheck(py_
%(name)
s, &
Py
GpuArrayType)) {
PyErr_SetString(PyExc_ValueError, "expected a GpuArray");
%(fail)
s
}
%(name)
s = (GpuArrayObject *)py_
%(name)
s;
%(name)
s = (
Py
GpuArrayObject *)py_
%(name)
s;
Py_INCREF(
%(name)
s);
"""
%
{
'name'
:
name
,
'fail'
:
sub
[
'fail'
]}
def
c_cleanup
(
self
,
name
,
sub
):
return
"Py_XDECREF(
%(name)
s);
%(name)
s = NULL;"
%
{
'name'
:
name
}
return
"Py_XDECREF(
%(name)
s);
%(name)
s = NULL;"
%
{
'name'
:
name
}
def
c_sync
(
self
,
name
,
sub
):
return
"""
...
...
@@ -184,7 +188,8 @@ class GpuArrayType(Type):
# We need arrayobject for the PyArrayDescr struct def
# (even if we just use a pointer to it in a function def)
return
[
'<compyte/array.h>'
,
'<compyte/kernel.h>'
,
'<compyte/error.h>'
,
'<numpy/arrayobject.h>'
,
'<gpuarray_api.h>'
]
'<compyte/buffer_blas.h>'
,
'<numpy/arrayobject.h>'
,
'<gpuarray_api.h>'
]
def
c_header_dirs
(
self
):
return
[
pygpu
.
get_include
(),
numpy
.
get_include
()]
...
...
@@ -193,10 +198,13 @@ class GpuArrayType(Type):
return
[
'compyte'
]
def
c_code_cache_version
(
self
):
return
(
1
,)
ver
=
pygpu
.
gpuarray
.
api_version
()
# we only use the major version since the minor revision are
# API-compatible.
return
(
1
,
ver
[
0
])
class
_operators
(
tensor
.
basic
.
_tensor_py_operators
):
class
_operators
(
_tensor_py_operators
):
def
_as_TensorVariable
(
self
):
from
basic_ops
import
host_from_gpu
return
host_from_gpu
(
self
)
...
...
@@ -204,10 +212,6 @@ class _operators(tensor.basic._tensor_py_operators):
def
_as_GpuArrayVariable
(
self
):
return
self
dtype
=
property
(
lambda
s
:
s
.
type
.
dtype
)
broadcastable
=
property
(
lambda
s
:
s
.
type
.
broadcastable
)
ndim
=
property
(
lambda
s
:
s
.
type
.
ndim
)
class
GpuArrayVariable
(
_operators
,
Variable
):
pass
...
...
@@ -276,12 +280,6 @@ theano.compile.register_view_op_c_code(GpuArrayType, """
theano
.
compile
.
register_deep_copy_op_c_code
(
GpuArrayType
,
"""
Py_XDECREF(
%(oname)
s);
%(oname)
s =
new_GpuArray((PyObject *)&GpuArrayType, GpuArray_default_context()
);
%(oname)
s =
pygpu_copy(
%(iname)
s, GA_ANY_ORDER
);
if (!
%(oname)
s) {
%(fail)
s }
int err;
err = GpuArray_copy(&
%(oname)
s->ga, &
%(iname)
s->ga, GA_ANY_ORDER);
if (err != GA_NO_ERROR) {
PyErr_SetString(PyExc_RuntimeError, "Error during copy");
%(fail)
s
}
"""
,
version
=
(
1
,))
"""
,
version
=
(
5
,))
theano/scalar/basic.py
浏览文件 @
8bd900f8
...
...
@@ -3018,7 +3018,7 @@ class Composite(ScalarOp):
rval
=
[]
for
subnode
in
self
.
fgraph
.
toposort
():
try
:
rval
.
append
(
subnode
.
op
.
c_support_code
())
rval
.
append
(
subnode
.
op
.
c_support_code
()
.
strip
()
)
except
gof
.
utils
.
MethodNotDefined
:
pass
# remove duplicate code blocks
...
...
theano/tensor/elemwise.py
浏览文件 @
8bd900f8
...
...
@@ -143,10 +143,6 @@ class DimShuffle(Op):
# list of dimensions of the input to drop
self
.
drop
=
[]
# this maps i before dropping dimensions to j after dropping dimensions
# so self.shuffle can be set properly later on
i2j
=
{}
j
=
0
for
i
,
b
in
enumerate
(
input_broadcastable
):
if
i
not
in
new_order
:
# we want to drop this dimension because it's not a value in
...
...
@@ -158,14 +154,9 @@ class DimShuffle(Op):
raise
ValueError
(
"You cannot drop a non-broadcastable dimension."
,
(
input_broadcastable
,
new_order
))
else
:
i2j
[
i
]
=
j
j
+=
1
# transposition of non-broadcastable dimensions
# This is how the dimensions will be permuted, without accounting for
# the extra 'x' broadcastable dimensions to insert.
self
.
shuffle
=
[
i2j
[
x
]
for
x
in
new_order
if
x
!=
'x'
]
# this is the list of the original dimensions that we keep
self
.
shuffle
=
[
x
for
x
in
new_order
if
x
!=
'x'
]
# list of dimensions of the output that are broadcastable and were not
# in the original input
...
...
@@ -237,16 +228,12 @@ class DimShuffle(Op):
res
=
input
if
type
(
res
)
!=
numpy
.
ndarray
and
type
(
res
)
!=
numpy
.
memmap
:
raise
TypeError
(
res
)
shape
=
list
(
res
.
shape
)
for
drop
in
reversed
(
self
.
drop
):
shape
.
pop
(
drop
)
res
=
res
.
reshape
(
shape
)
# transpose
res
=
res
.
transpose
(
self
.
shuffle
)
res
=
res
.
transpose
(
self
.
shuffle
+
self
.
drop
)
# augment
shape
=
list
(
res
.
shape
)
shape
=
list
(
res
.
shape
[:
len
(
self
.
shuffle
)]
)
for
augm
in
self
.
augment
:
shape
.
insert
(
augm
,
1
)
res
=
res
.
reshape
(
shape
)
...
...
@@ -259,9 +246,6 @@ class DimShuffle(Op):
def
infer_shape
(
self
,
node
,
shapes
):
ishp
,
=
shapes
ishp
=
list
(
ishp
)
for
drop
in
reversed
(
self
.
drop
):
del
ishp
[
drop
]
# transpose
rval
=
[
ishp
[
i
]
for
i
in
self
.
shuffle
]
...
...
theano/tensor/opt.py
浏览文件 @
8bd900f8
...
...
@@ -410,9 +410,9 @@ def local_dimshuffle_lift(node):
inode
=
input
.
owner
if
inode
and
isinstance
(
inode
.
op
,
Elemwise
)
and
(
len
(
input
.
clients
)
==
1
):
# Don't use make_node to have tag.test_value set.
ret
=
inode
.
op
(
*
[
DimShuffle
(
inp
.
type
.
broadcastable
,
op
.
new_order
,
op
.
inplace
)(
inp
)
for
inp
in
ret
=
inode
.
op
(
*
[
op
.
__class__
(
inp
.
type
.
broadcastable
,
op
.
new_order
,
op
.
inplace
)(
inp
)
for
inp
in
inode
.
inputs
],
**
dict
(
return_list
=
True
))
return
ret
if
inode
and
isinstance
(
inode
.
op
,
DimShuffle
):
...
...
@@ -424,8 +424,8 @@ def local_dimshuffle_lift(node):
iinput
.
type
.
ndim
):
return
[
iinput
]
else
:
ret
=
DimShuffle
(
iinput
.
type
.
broadcastable
,
new_order
,
inplace
)(
iinput
,
**
dict
(
return_list
=
True
))
ret
=
op
.
__class__
(
iinput
.
type
.
broadcastable
,
new_order
,
inplace
)(
iinput
,
**
dict
(
return_list
=
True
))
return
ret
...
...
@@ -460,7 +460,7 @@ def dimshuffle_as_view(node):
op
=
node
.
op
if
not
isinstance
(
op
,
DimShuffle
)
or
op
.
inplace
:
return
False
new_op
=
DimShuffle
(
op
.
input_broadcastable
,
op
.
new_order
,
inplace
=
True
)
new_op
=
op
.
__class__
(
op
.
input_broadcastable
,
op
.
new_order
,
inplace
=
True
)
return
[
new_op
(
*
node
.
inputs
)]
#Step 60 is the inplace optimization stage.
...
...
@@ -4609,7 +4609,7 @@ def local_elemwise_fusion_op(OP, max_input_fct=lambda node: 1024):
# worthwhile if the summation axis doesn't line up with a
# contiguous dimension)
if
not
isinstance
(
node
.
op
,
OP
)
:
if
type
(
node
.
op
)
is
not
OP
:
return
False
inputs
=
[]
# inputs of the new Elemwise op.
s_inputs
=
[]
# inputs of the new scalar op used by the Composite.
...
...
theano/tensor/tests/test_basic.py
浏览文件 @
8bd900f8
...
...
@@ -44,7 +44,7 @@ from theano.tensor import (_shared, wvector, bvector, autocast_float_as,
dtensor3
,
SpecifyShape
,
Mean
,
itensor3
,
Tile
,
switch
,
Diagonal
,
Diag
,
nonzero
,
flatnonzero
,
nonzero_values
,
stacklists
)
stacklists
,
DimShuffle
)
from
theano.tests
import
unittest_tools
as
utt
...
...
@@ -4204,9 +4204,30 @@ class T_op_cache(unittest.TestCase):
self
.
assertTrue
(
numpy
.
all
(
fn_py
(
a
)
==
fn_c_or_py
(
a
)))
class
T_reshape
(
unittest
.
TestCase
):
def
setUp
(
self
):
utt
.
seed_rng
()
class
T_reshape
(
utt
.
InferShapeTester
,
utt
.
TestOptimizationMixin
):
def
__init__
(
self
,
name
,
shared
=
tensor
.
_shared
,
op
=
Reshape
,
mode
=
None
,
ignore_topo
=
(
DeepCopyOp
,
opt
.
MakeVector
,
opt
.
Shape_i
,
DimShuffle
,
theano
.
tensor
.
Elemwise
)):
self
.
shared
=
shared
self
.
op
=
op
#The tag canonicalize is needed for the shape test in FAST_COMPILE
self
.
mode
=
mode
self
.
ignore_topo
=
ignore_topo
return
super
(
T_reshape
,
self
)
.
__init__
(
name
)
def
function
(
self
,
inputs
,
outputs
):
f
=
function
(
inputs
,
outputs
,
mode
=
self
.
mode
)
if
self
.
mode
is
not
None
or
theano
.
config
.
mode
!=
"FAST_COMPILE"
:
topo
=
f
.
maker
.
fgraph
.
toposort
()
topo_
=
[
node
for
node
in
topo
if
not
isinstance
(
node
.
op
,
self
.
ignore_topo
)]
assert
len
(
topo_
)
==
1
,
topo_
return
f
def
eval_output_and_check
(
self
,
t
):
f
=
self
.
function
([],
t
)
tval
=
f
()
return
tval
def
test_reshape
(
self
):
a
=
dvector
()
...
...
@@ -4215,7 +4236,7 @@ class T_reshape(unittest.TestCase):
#basic to 1 dim(without list)
c
=
reshape
(
b
,
as_tensor_variable
(
6
),
ndim
=
1
)
f
=
inplace_func
([
b
],
c
)
f
=
self
.
function
([
b
],
c
)
b_val1
=
numpy
.
asarray
([[
0
,
1
,
2
],
[
3
,
4
,
5
]])
c_val1
=
numpy
.
asarray
([
0
,
1
,
2
,
3
,
4
,
5
])
...
...
@@ -4231,7 +4252,7 @@ class T_reshape(unittest.TestCase):
#basic to 1 dim(with list)
c
=
reshape
(
b
,
(
as_tensor_variable
(
6
),),
ndim
=
1
)
f
=
inplace_func
([
b
],
c
)
f
=
self
.
function
([
b
],
c
)
assert
numpy
.
all
(
f
(
numpy
.
asarray
([[
0
,
1
,
2
],
[
3
,
4
,
5
]]))
==
numpy
.
asarray
([
0
,
1
,
2
,
3
,
4
,
5
]))
#print f.maker.fgraph.toposort()
...
...
@@ -4239,14 +4260,14 @@ class T_reshape(unittest.TestCase):
#basic to shape object of same ndim
c
=
reshape
(
b
,
d
.
shape
)
f
=
inplace_func
([
b
,
d
],
c
)
f
=
self
.
function
([
b
,
d
],
c
)
assert
numpy
.
all
(
f
(
numpy
.
asarray
([[
0
,
1
,
2
],
[
3
,
4
,
5
]]),
[[
0
,
1
],
[
2
,
3
],
[
4
,
5
]])
==
numpy
.
asarray
([[
0
,
1
],
[
2
,
3
],
[
4
,
5
]]))
#basic to 2 dims
c
=
reshape
(
a
,
[
2
,
3
])
f
=
inplace_func
([
a
],
c
)
f
=
self
.
function
([
a
],
c
)
assert
numpy
.
all
(
f
(
numpy
.
asarray
([
0
,
1
,
2
,
3
,
4
,
5
]))
==
numpy
.
asarray
([[
0
,
1
,
2
],
[
3
,
4
,
5
]]))
...
...
@@ -4255,7 +4276,7 @@ class T_reshape(unittest.TestCase):
a_val_copy
=
numpy
.
asarray
([
0
,
1
,
2
,
3
,
4
,
5
])
b_val
=
numpy
.
asarray
([[
0
,
1
,
2
],
[
3
,
4
,
5
]])
f_sub
=
inplace_func
([
a
,
b
],
c
-
b
)
f_sub
=
self
.
function
([
a
,
b
],
c
-
b
)
assert
numpy
.
all
(
f_sub
(
a_val
,
b_val
)
==
0.0
)
assert
numpy
.
all
(
a_val
==
a_val_copy
)
...
...
@@ -4264,35 +4285,33 @@ class T_reshape(unittest.TestCase):
a_val_copy
=
theano
.
_asarray
([
0
,
1
,
2
,
3
,
4
,
5
],
dtype
=
'float64'
)
b_val
=
theano
.
_asarray
([[
0
,
1
,
2
],
[
3
,
4
,
5
]],
dtype
=
'float64'
)
f_sub
=
inplace_func
([
a
,
b
],
c
-
b
)
f_sub
=
self
.
function
([
a
,
b
],
c
-
b
)
assert
numpy
.
all
(
f_sub
(
a_val
,
b_val
)
==
0.0
)
assert
numpy
.
all
(
a_val
==
a_val_copy
)
# verify gradient
def
just_vals
(
v
):
return
Reshape
(
2
)(
v
,
theano
.
_asarray
([
2
,
3
],
dtype
=
'int32'
))
utt
.
verify_grad
(
just_vals
,
[
a_val
])
utt
.
verify_grad
(
just_vals
,
[
a_val
]
,
mode
=
self
.
mode
)
#test infer_shape
f_sub
=
function
([
a
,
b
],
(
c
-
b
)
.
shape
)
if
config
.
mode
==
"FAST_COMPILE"
:
assert
len
(
f_sub
.
maker
.
fgraph
.
toposort
())
==
3
else
:
topo
=
f_sub
.
maker
.
fgraph
.
toposort
()
assert
len
(
topo
)
==
1
topo
[
0
]
.
op
==
theano
.
compile
.
function_module
.
deep_copy_op
#assert numpy.all(f_sub(a_val,numpy.asarray([[0,1],[2,3],[4,5]]))==[2,3])#work in FAST_RUN, but fail on other!
#assert numpy.all(f_sub(a_val,numpy.asarray([[0,1],[2,3],[4,5],[6,7]]))==[2,3])#work in FAST_RUN, but fail on other!
self
.
_compile_and_check
([
a
],
[
c
],
(
a_val
,),
self
.
op
)
# test broadcast flag for constant value of 1
c
=
reshape
(
b
,
(
b
.
shape
[
0
],
b
.
shape
[
1
],
1
))
f
=
inplace_func
([
b
],
c
)
f
=
self
.
function
([
b
],
c
)
assert
numpy
.
all
(
f
(
numpy
.
asarray
([[
0
,
1
,
2
],
[
3
,
4
,
5
]]))
==
numpy
.
asarray
([[[
0
],
[
1
],
[
2
]],
[[
3
],
[
4
],
[
5
]]]))
assert
(
f
.
maker
.
fgraph
.
toposort
()[
-
2
]
.
outputs
[
0
]
.
type
.
broadcastable
==
(
False
,
False
,
True
))
assert
numpy
.
all
(
f_sub
(
a_val
,
b_val
)
==
[
2
,
3
])
def
test_m1
(
self
):
t
=
tensor3
()
rng
=
numpy
.
random
.
RandomState
(
seed
=
utt
.
fetch_seed
())
val
=
rng
.
uniform
(
size
=
(
3
,
4
,
5
))
.
astype
(
config
.
floatX
)
for
out
in
[
t
.
reshape
([
-
1
]),
t
.
reshape
([
-
1
,
5
]),
t
.
reshape
([
5
,
-
1
]),
t
.
reshape
([
5
,
-
1
,
3
])]:
self
.
_compile_and_check
([
t
],
[
out
],
[
val
],
self
.
op
)
def
test_reshape_long_in_shape
(
self
):
v
=
dvector
(
'v'
)
...
...
@@ -4311,14 +4330,14 @@ class T_reshape(unittest.TestCase):
r
=
a
.
reshape
(
shapes
,
ndim
=
1
)
z
=
zeros_like
(
r
)
f
=
function
([
a
,
shapes
],
z
.
shape
)
f
=
self
.
function
([
a
,
shapes
],
z
.
shape
)
self
.
assertRaises
(
ValueError
,
f
,
a_val
,
[
13
])
#Test reshape to 2 dim
r
=
a
.
reshape
(
shapes
,
ndim
=
2
)
z
=
zeros_like
(
r
)
f
=
function
([
a
,
shapes
],
z
.
shape
)
f
=
self
.
function
([
a
,
shapes
],
z
.
shape
)
self
.
assertRaises
(
ValueError
,
f
,
a_val
,
[
-
1
,
5
])
self
.
assertRaises
(
ValueError
,
f
,
a_val
,
[
7
,
-
1
])
...
...
theano/tensor/tests/test_elemwise.py
浏览文件 @
8bd900f8
...
...
@@ -11,7 +11,7 @@ from theano.gof.python25 import all, any
from
theano
import
gof
,
scalar
,
config
from
theano
import
tensor
from
theano.tensor
import
TensorType
from
theano.tensor
import
TensorType
,
as_tensor_variable
from
theano.compile.mode
import
get_default_mode
from
theano.tensor.elemwise
import
(
CAReduce
,
Elemwise
,
DimShuffle
,
Prod
,
ProdWithoutZeros
)
...
...
@@ -24,6 +24,7 @@ def FunctionGraph(i, o):
class
test_DimShuffle
(
unittest_tools
.
InferShapeTester
):
op
=
DimShuffle
def
with_linker
(
self
,
linker
):
for
xsh
,
shuffle
,
zsh
in
[((
2
,
3
),
(
1
,
'x'
,
0
),
(
3
,
1
,
2
)),
...
...
@@ -38,12 +39,12 @@ class test_DimShuffle(unittest_tools.InferShapeTester):
((
1
,),
(
'x'
,
'x'
),
(
1
,
1
))]:
ib
=
[(
entry
==
1
)
for
entry
in
xsh
]
x
=
TensorType
(
'float64'
,
ib
)(
'x'
)
e
=
DimShuffle
(
ib
,
shuffle
)(
x
)
e
=
self
.
op
(
ib
,
shuffle
)(
x
)
f
=
copy
(
linker
)
.
accept
(
FunctionGraph
([
x
],
[
e
]))
.
make_function
()
assert
f
(
numpy
.
ones
(
xsh
))
.
shape
==
zsh
#test that DimShuffle.infer_shape work correctly
x
=
TensorType
(
'float64'
,
ib
)(
'x'
)
e
=
DimShuffle
(
ib
,
shuffle
)(
x
)
e
=
self
.
op
(
ib
,
shuffle
)(
x
)
f
=
copy
(
linker
)
.
accept
(
FunctionGraph
([
x
],
[
e
.
shape
]))
.
make_function
()
assert
all
(
f
(
numpy
.
ones
(
xsh
)))
==
all
(
zsh
)
...
...
@@ -51,12 +52,12 @@ class test_DimShuffle(unittest_tools.InferShapeTester):
# Test when we drop a axis that is not broadcastable
ib
=
[
False
,
True
,
False
]
x
=
TensorType
(
'float64'
,
ib
)(
'x'
)
self
.
assertRaises
(
ValueError
,
DimShuffle
,
ib
,
shuffle
)
self
.
assertRaises
(
ValueError
,
self
.
op
,
ib
,
shuffle
)
# Test when we drop a axis that don't have shape 1
ib
=
[
True
,
True
,
False
]
x
=
TensorType
(
'float64'
,
ib
)(
'x'
)
e
=
DimShuffle
(
ib
,
(
1
,
2
))(
x
)
e
=
self
.
op
(
ib
,
(
1
,
2
))(
x
)
f
=
copy
(
linker
)
.
accept
(
FunctionGraph
([
x
],
[
e
.
shape
]))
.
make_function
()
self
.
assertRaises
(
TypeError
,
f
,
numpy
.
ones
((
2
,
1
,
4
)))
...
...
@@ -89,8 +90,8 @@ class test_DimShuffle(unittest_tools.InferShapeTester):
adtens
=
TensorType
(
'float64'
,
ib
)(
'x'
)
adtens_val
=
numpy
.
ones
(
xsh
)
self
.
_compile_and_check
([
adtens
],
[
DimShuffle
(
ib
,
shuffle
)(
adtens
)],
[
adtens_val
],
DimShuffle
,
[
self
.
op
(
ib
,
shuffle
)(
adtens
)],
[
adtens_val
],
self
.
op
,
warn
=
False
)
def
test_too_big_rank
(
self
):
...
...
@@ -137,10 +138,23 @@ class test_reduce_axes(unittest.TestCase):
m
=
x
.
var
(
a
)
class
test_Broadcast
(
unittest
.
TestCase
):
# this is to allow other types to reuse this class to test their ops
type
=
TensorType
op
=
Elemwise
ctype
=
TensorType
cop
=
Elemwise
def
rand_val
(
self
,
shp
):
return
numpy
.
asarray
(
numpy
.
random
.
rand
(
*
shp
))
def
rand_cval
(
self
,
shp
):
return
numpy
.
asarray
(
numpy
.
random
.
rand
(
*
shp
))
def
setUp
(
self
):
unittest_tools
.
seed_rng
()
def
with_linker
(
self
,
linker
):
def
with_linker
(
self
,
linker
,
op
,
type
,
rand_val
):
for
xsh
,
ysh
in
[((
3
,
5
),
(
3
,
5
)),
((
3
,
5
),
(
1
,
5
)),
((
3
,
5
),
(
3
,
1
)),
...
...
@@ -150,12 +164,12 @@ class test_Broadcast(unittest.TestCase):
((
2
,
3
,
4
,
5
),
(
1
,
3
,
1
,
5
)),
((
2
,
3
,
4
,
5
),
(
1
,
1
,
1
,
1
)),
((),
())]:
x
=
TensorT
ype
(
'float64'
,
[(
entry
==
1
)
for
entry
in
xsh
])(
'x'
)
y
=
TensorT
ype
(
'float64'
,
[(
entry
==
1
)
for
entry
in
ysh
])(
'y'
)
e
=
Elemwise
(
scalar
.
add
)(
x
,
y
)
x
=
t
ype
(
'float64'
,
[(
entry
==
1
)
for
entry
in
xsh
])(
'x'
)
y
=
t
ype
(
'float64'
,
[(
entry
==
1
)
for
entry
in
ysh
])(
'y'
)
e
=
op
(
scalar
.
add
)(
x
,
y
)
f
=
copy
(
linker
)
.
accept
(
FunctionGraph
([
x
,
y
],
[
e
]))
.
make_function
()
xv
=
numpy
.
asarray
(
numpy
.
random
.
rand
(
*
xsh
)
)
yv
=
numpy
.
asarray
(
numpy
.
random
.
rand
(
*
ysh
)
)
xv
=
rand_val
(
xsh
)
yv
=
rand_val
(
ysh
)
zv
=
xv
+
yv
self
.
assertTrue
((
f
(
xv
,
yv
)
==
zv
)
.
all
())
...
...
@@ -163,14 +177,14 @@ class test_Broadcast(unittest.TestCase):
#test Elemwise.infer_shape
#the Shape op don't implement c_code!
if
isinstance
(
linker
,
gof
.
PerformLinker
):
x
=
TensorT
ype
(
'float64'
,
[(
entry
==
1
)
for
entry
in
xsh
])(
'x'
)
y
=
TensorT
ype
(
'float64'
,
[(
entry
==
1
)
for
entry
in
ysh
])(
'y'
)
e
=
Elemwise
(
scalar
.
add
)(
x
,
y
)
x
=
t
ype
(
'float64'
,
[(
entry
==
1
)
for
entry
in
xsh
])(
'x'
)
y
=
t
ype
(
'float64'
,
[(
entry
==
1
)
for
entry
in
ysh
])(
'y'
)
e
=
op
(
scalar
.
add
)(
x
,
y
)
f
=
copy
(
linker
)
.
accept
(
FunctionGraph
([
x
,
y
],
[
e
.
shape
]))
.
make_function
()
assert
tuple
(
f
(
xv
,
yv
))
==
tuple
(
zv
.
shape
)
def
with_linker_inplace
(
self
,
linker
):
def
with_linker_inplace
(
self
,
linker
,
op
,
type
,
rand_val
):
for
xsh
,
ysh
in
[((
5
,
5
),
(
5
,
5
)),
((
5
,
5
),
(
1
,
5
)),
((
5
,
5
),
(
5
,
1
)),
...
...
@@ -179,12 +193,12 @@ class test_Broadcast(unittest.TestCase):
((
2
,
3
,
4
,
5
),
(
1
,
3
,
1
,
5
)),
((
2
,
3
,
4
,
5
),
(
1
,
1
,
1
,
1
)),
((),
())]:
x
=
TensorT
ype
(
'float64'
,
[(
entry
==
1
)
for
entry
in
xsh
])(
'x'
)
y
=
TensorT
ype
(
'float64'
,
[(
entry
==
1
)
for
entry
in
ysh
])(
'y'
)
e
=
Elemwise
(
scalar
.
Add
(
scalar
.
transfer_type
(
0
)),
{
0
:
0
})(
x
,
y
)
x
=
t
ype
(
'float64'
,
[(
entry
==
1
)
for
entry
in
xsh
])(
'x'
)
y
=
t
ype
(
'float64'
,
[(
entry
==
1
)
for
entry
in
ysh
])(
'y'
)
e
=
op
(
scalar
.
Add
(
scalar
.
transfer_type
(
0
)),
{
0
:
0
})(
x
,
y
)
f
=
copy
(
linker
)
.
accept
(
FunctionGraph
([
x
,
y
],
[
e
]))
.
make_function
()
xv
=
numpy
.
asarray
(
numpy
.
random
.
rand
(
*
xsh
)
)
yv
=
numpy
.
asarray
(
numpy
.
random
.
rand
(
*
ysh
)
)
xv
=
rand_val
(
xsh
)
yv
=
rand_val
(
ysh
)
zv
=
xv
+
yv
f
(
xv
,
yv
)
...
...
@@ -193,13 +207,13 @@ class test_Broadcast(unittest.TestCase):
#test Elemwise.infer_shape
#the Shape op don't implement c_code!
if
isinstance
(
linker
,
gof
.
PerformLinker
):
x
=
TensorT
ype
(
'float64'
,
[(
entry
==
1
)
for
entry
in
xsh
])(
'x'
)
y
=
TensorT
ype
(
'float64'
,
[(
entry
==
1
)
for
entry
in
ysh
])(
'y'
)
e
=
Elemwise
(
scalar
.
Add
(
scalar
.
transfer_type
(
0
)),
{
0
:
0
})(
x
,
y
)
x
=
t
ype
(
'float64'
,
[(
entry
==
1
)
for
entry
in
xsh
])(
'x'
)
y
=
t
ype
(
'float64'
,
[(
entry
==
1
)
for
entry
in
ysh
])(
'y'
)
e
=
op
(
scalar
.
Add
(
scalar
.
transfer_type
(
0
)),
{
0
:
0
})(
x
,
y
)
f
=
copy
(
linker
)
.
accept
(
FunctionGraph
([
x
,
y
],
[
e
.
shape
]))
.
make_function
()
xv
=
numpy
.
asarray
(
numpy
.
random
.
rand
(
*
xsh
)
)
yv
=
numpy
.
asarray
(
numpy
.
random
.
rand
(
*
ysh
)
)
xv
=
rand_val
(
xsh
)
yv
=
rand_val
(
ysh
)
zv
=
xv
+
yv
f
(
xv
,
yv
)
...
...
@@ -207,30 +221,33 @@ class test_Broadcast(unittest.TestCase):
assert
xv
.
shape
==
zv
.
shape
def
test_perform
(
self
):
self
.
with_linker
(
gof
.
PerformLinker
())
self
.
with_linker
(
gof
.
PerformLinker
(),
self
.
op
,
self
.
type
,
self
.
rand_val
)
def
test_c
(
self
):
if
not
theano
.
config
.
cxx
:
raise
SkipTest
(
"G++ not available, so we need to skip this test."
)
self
.
with_linker
(
gof
.
CLinker
())
self
.
with_linker
(
gof
.
CLinker
()
,
self
.
cop
,
self
.
ctype
,
self
.
rand_cval
)
def
test_perform_inplace
(
self
):
self
.
with_linker_inplace
(
gof
.
PerformLinker
())
self
.
with_linker_inplace
(
gof
.
PerformLinker
(),
self
.
op
,
self
.
type
,
self
.
rand_val
)
def
test_c_inplace
(
self
):
if
not
theano
.
config
.
cxx
:
raise
SkipTest
(
"G++ not available, so we need to skip this test."
)
self
.
with_linker_inplace
(
gof
.
CLinker
())
self
.
with_linker_inplace
(
gof
.
CLinker
(),
self
.
cop
,
self
.
ctype
,
self
.
rand_cval
)
def
test_fill
(
self
):
if
not
theano
.
config
.
cxx
:
raise
SkipTest
(
"G++ not available, so we need to skip this test."
)
x
=
TensorT
ype
(
'float64'
,
[
0
,
0
])(
'x'
)
y
=
TensorT
ype
(
'float64'
,
[
1
,
1
])(
'y'
)
e
=
Elemwise
(
scalar
.
Second
(
scalar
.
transfer_type
(
0
)),
{
0
:
0
})(
x
,
y
)
x
=
self
.
ct
ype
(
'float64'
,
[
0
,
0
])(
'x'
)
y
=
self
.
ct
ype
(
'float64'
,
[
1
,
1
])(
'y'
)
e
=
self
.
cop
(
scalar
.
Second
(
scalar
.
transfer_type
(
0
)),
{
0
:
0
})(
x
,
y
)
f
=
gof
.
CLinker
()
.
accept
(
FunctionGraph
([
x
,
y
],
[
e
]))
.
make_function
()
xv
=
numpy
.
ones
((
5
,
5
))
yv
=
numpy
.
random
.
rand
(
1
,
1
)
xv
=
self
.
rand_cval
((
5
,
5
))
yv
=
self
.
rand_cval
((
1
,
1
)
)
f
(
xv
,
yv
)
assert
(
xv
==
yv
)
.
all
()
...
...
@@ -245,27 +262,28 @@ class test_Broadcast(unittest.TestCase):
def
test_weird_strides
(
self
):
if
not
theano
.
config
.
cxx
:
raise
SkipTest
(
"G++ not available, so we need to skip this test."
)
x
=
TensorT
ype
(
'float64'
,
[
0
,
0
,
0
,
0
,
0
])(
'x'
)
y
=
TensorT
ype
(
'float64'
,
[
0
,
0
,
0
,
0
,
0
])(
'y'
)
e
=
Elemwise
(
scalar
.
add
)(
x
,
y
)
x
=
self
.
ct
ype
(
'float64'
,
[
0
,
0
,
0
,
0
,
0
])(
'x'
)
y
=
self
.
ct
ype
(
'float64'
,
[
0
,
0
,
0
,
0
,
0
])(
'y'
)
e
=
self
.
cop
(
scalar
.
add
)(
x
,
y
)
f
=
gof
.
CLinker
()
.
accept
(
FunctionGraph
([
x
,
y
],
[
e
]))
.
make_function
()
xv
=
numpy
.
random
.
rand
(
2
,
2
,
2
,
2
,
2
)
yv
=
numpy
.
random
.
rand
(
2
,
2
,
2
,
2
,
2
)
.
transpose
(
4
,
0
,
3
,
1
,
2
)
xv
=
self
.
rand_cval
((
2
,
2
,
2
,
2
,
2
)
)
yv
=
self
.
rand_cval
((
2
,
2
,
2
,
2
,
2
)
)
.
transpose
(
4
,
0
,
3
,
1
,
2
)
zv
=
xv
+
yv
assert
(
f
(
xv
,
yv
)
==
zv
)
.
all
()
def
test_same_inputs
(
self
):
if
not
theano
.
config
.
cxx
:
raise
SkipTest
(
"G++ not available, so we need to skip this test."
)
x
=
TensorT
ype
(
'float64'
,
[
0
,
0
])(
'x'
)
e
=
Elemwise
(
scalar
.
add
)(
x
,
x
)
x
=
self
.
ct
ype
(
'float64'
,
[
0
,
0
])(
'x'
)
e
=
self
.
cop
(
scalar
.
add
)(
x
,
x
)
f
=
gof
.
CLinker
()
.
accept
(
FunctionGraph
([
x
],
[
e
]))
.
make_function
()
xv
=
numpy
.
random
.
rand
(
2
,
2
)
xv
=
self
.
rand_cval
((
2
,
2
)
)
zv
=
xv
+
xv
assert
(
f
(
xv
)
==
zv
)
.
all
()
class
test_CAReduce
(
unittest_tools
.
InferShapeTester
):
op
=
CAReduce
def
with_linker
(
self
,
linker
,
scalar_op
=
scalar
.
add
,
dtype
=
"floatX"
,
test_nan
=
False
,
tensor_op
=
None
):
...
...
@@ -288,9 +306,9 @@ class test_CAReduce(unittest_tools.InferShapeTester):
dtype
=
theano
.
config
.
floatX
x
=
TensorType
(
dtype
,
[(
entry
==
1
)
for
entry
in
xsh
])(
'x'
)
if
tensor_op
is
None
:
e
=
CAReduce
(
scalar_op
,
axis
=
tosum
)(
x
)
e
=
as_tensor_variable
(
self
.
op
(
scalar_op
,
axis
=
tosum
)(
x
)
)
else
:
e
=
tensor_op
(
x
,
axis
=
tosum
)
e
=
as_tensor_variable
(
tensor_op
(
x
,
axis
=
tosum
)
)
if
tosum
is
None
:
tosum
=
range
(
len
(
xsh
))
...
...
@@ -395,7 +413,7 @@ class test_CAReduce(unittest_tools.InferShapeTester):
if
isinstance
(
linker
,
gof
.
PerformLinker
):
x
=
TensorType
(
dtype
,
[(
entry
==
1
)
for
entry
in
xsh
])(
'x'
)
if
tensor_op
is
None
:
e
=
CAReduce
(
scalar_op
,
axis
=
tosum
)(
x
)
e
=
self
.
op
(
scalar_op
,
axis
=
tosum
)(
x
)
else
:
e
=
tensor_op
(
x
,
axis
=
tosum
)
if
tosum
is
None
:
...
...
@@ -491,8 +509,8 @@ class test_CAReduce(unittest_tools.InferShapeTester):
tosum
=
range
(
len
(
xsh
))
xv
=
numpy
.
asarray
(
numpy
.
random
.
rand
(
*
xsh
),
dtype
=
dtype
)
self
.
_compile_and_check
([
x
],
[
CAReduce
(
scalar
.
add
,
axis
=
tosum
)(
x
)],
[
xv
],
CAReduce
,
[
"local_cut_useless_reduce"
])
[
self
.
op
(
scalar
.
add
,
axis
=
tosum
)(
x
)],
[
xv
],
self
.
op
,
[
"local_cut_useless_reduce"
])
class
test_Prod
(
unittest
.
TestCase
):
...
...
theano/tensor/tests/test_subtensor.py
浏览文件 @
8bd900f8
...
...
@@ -122,10 +122,9 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
try
:
try
:
self
.
eval_output_and_check
(
t
)
assert
0
except
Exception
,
e
:
if
'out of bounds'
not
in
exc_message
(
e
):
raise
except
IndexError
,
e
:
return
self
.
fail
()
finally
:
_logger
.
setLevel
(
oldlevel
)
...
...
@@ -161,7 +160,7 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
def
test1_0_dims
(
self
):
n
=
self
.
shared
(
numpy
.
ones
((),
dtype
=
self
.
dtype
))
t
=
theano
.
tensor
.
Subtensor
([])(
n
)
t
=
self
.
sub
([])(
n
)
self
.
assertTrue
(
isinstance
(
t
.
owner
.
op
,
Subtensor
))
mode
=
self
.
mode
self
.
mode
=
mode
.
excluding
(
"local_useless_subtensor"
)
...
...
@@ -188,7 +187,6 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
self
.
assertTrue
(
tval
==
5.0
)
def
test1_ok_range_infinite
(
self
):
#Subtensor.debug = True
n
=
self
.
shared
(
numpy
.
arange
(
3
,
dtype
=
self
.
dtype
))
t
=
n
[
1
:]
self
.
assertTrue
(
isinstance
(
t
.
owner
.
op
,
Subtensor
))
...
...
theano/tensor/var.py
浏览文件 @
8bd900f8
...
...
@@ -543,8 +543,8 @@ class _tensor_py_operators:
def
get_scalar_constant_value
(
self
):
return
theano
.
tensor
.
basic
.
get_scalar_constant_value
(
self
)
def
zeros_like
(
self
,
dtype
=
None
):
return
theano
.
tensor
.
basic
.
zeros_like
(
self
,
dtype
=
dtype
)
def
zeros_like
(
model
,
dtype
=
None
):
return
theano
.
tensor
.
basic
.
zeros_like
(
model
,
dtype
=
dtype
)
class
TensorVariable
(
_tensor_py_operators
,
Variable
):
...
...
theano/tests/unittest_tools.py
浏览文件 @
8bd900f8
...
...
@@ -182,7 +182,10 @@ class InferShapeTester(unittest.TestCase):
def
setUp
(
self
):
seed_rng
()
# Take into account any mode that may be defined in a child class
mode
=
getattr
(
self
,
'mode'
,
theano
.
compile
.
get_default_mode
())
# and it can be None
mode
=
getattr
(
self
,
'mode'
,
None
)
if
mode
is
None
:
mode
=
theano
.
compile
.
get_default_mode
()
# This mode seems to be the minimal one including the shape_i
# optimizations, if we don't want to enumerate them explicitly.
self
.
mode
=
mode
.
including
(
"canonicalize"
)
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论