Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
5df0cfd8
提交
5df0cfd8
authored
7月 01, 2017
作者:
Frédéric Bastien
提交者:
GitHub
7月 01, 2017
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #6091 from notoraptor/params-for-other-ops
Params for other ops
上级
49d99209
61cb8c41
全部展开
显示空白字符变更
内嵌
并排
正在显示
7 个修改的文件
包含
114 行增加
和
85 行删除
+114
-85
ops.py
theano/compile/ops.py
+16
-3
extra_ops.py
theano/gpuarray/extra_ops.py
+19
-18
neighbours.py
theano/gpuarray/neighbours.py
+0
-0
rng_mrg.py
theano/gpuarray/rng_mrg.py
+35
-24
subtensor.py
theano/gpuarray/subtensor.py
+0
-0
neighbours.py
theano/tensor/nnet/neighbours.py
+31
-26
subtensor.py
theano/tensor/subtensor.py
+13
-14
没有找到文件。
theano/compile/ops.py
浏览文件 @
5df0cfd8
...
@@ -346,6 +346,18 @@ class Shape_i(gof.Op):
...
@@ -346,6 +346,18 @@ class Shape_i(gof.Op):
i
=
int
(
i
)
i
=
int
(
i
)
self
.
i
=
i
self
.
i
=
i
# NB:
# 1) params_type is defined as a property to avoid
# loop in Python import caused by importing theano.scalar below
# when params_type is defined directly in class code.
# 2) We wrap scalar into ParamsType (instead of directly using scalar as op param)
# to avoid Theano converting scalar param to constant that would be later
# hardcoded as litteral in C code, making us loose all the advantages of
# using params.
@property
def
params_type
(
self
):
return
gof
.
ParamsType
(
i
=
theano
.
scalar
.
basic
.
int64
)
def
__str__
(
self
):
def
__str__
(
self
):
return
'
%
s{
%
i}'
%
(
self
.
__class__
.
__name__
,
self
.
i
)
return
'
%
s{
%
i}'
%
(
self
.
__class__
.
__name__
,
self
.
i
)
...
@@ -360,7 +372,7 @@ class Shape_i(gof.Op):
...
@@ -360,7 +372,7 @@ class Shape_i(gof.Op):
(
x
,
self
.
i
))
(
x
,
self
.
i
))
return
theano
.
Apply
(
self
,
[
x
],
[
theano
.
tensor
.
lscalar
()])
return
theano
.
Apply
(
self
,
[
x
],
[
theano
.
tensor
.
lscalar
()])
def
perform
(
self
,
node
,
inp
,
out_
):
def
perform
(
self
,
node
,
inp
,
out_
,
params
):
x
,
=
inp
x
,
=
inp
out
,
=
out_
out
,
=
out_
if
out
[
0
]
is
None
:
if
out
[
0
]
is
None
:
...
@@ -383,7 +395,7 @@ class Shape_i(gof.Op):
...
@@ -383,7 +395,7 @@ class Shape_i(gof.Op):
version
.
append
((
str
(
t
),
v
))
version
.
append
((
str
(
t
),
v
))
if
version
:
if
version
:
version
.
append
(
1
)
version
.
append
(
2
)
return
tuple
(
version
)
return
tuple
(
version
)
...
@@ -391,7 +403,8 @@ class Shape_i(gof.Op):
...
@@ -391,7 +403,8 @@ class Shape_i(gof.Op):
iname
,
=
inames
iname
,
=
inames
oname
,
=
onames
oname
,
=
onames
fail
=
sub
[
'fail'
]
fail
=
sub
[
'fail'
]
i
=
self
.
i
# i is then 'params->i', not just 'params'.
i
=
sub
[
'params'
]
+
'->i'
itype
=
node
.
inputs
[
0
]
.
type
.
__class__
itype
=
node
.
inputs
[
0
]
.
type
.
__class__
if
itype
in
self
.
c_code_and_version
:
if
itype
in
self
.
c_code_and_version
:
...
...
theano/gpuarray/extra_ops.py
浏览文件 @
5df0cfd8
...
@@ -10,6 +10,9 @@ except ImportError:
...
@@ -10,6 +10,9 @@ except ImportError:
from
.basic_ops
import
(
as_gpuarray_variable
,
GpuKernelBase
,
Kernel
,
GpuReshape
,
infer_context_name
)
from
.basic_ops
import
(
as_gpuarray_variable
,
GpuKernelBase
,
Kernel
,
GpuReshape
,
infer_context_name
)
from
.opt
import
register_opt
,
op_lifter
,
register_opt2
from
.opt
import
register_opt
,
op_lifter
,
register_opt2
from
.type
import
gpu_context_type
from
theano.gof
import
ParamsType
import
theano.scalar
as
scalar
class
GpuCumOp
(
GpuKernelBase
,
Op
):
class
GpuCumOp
(
GpuKernelBase
,
Op
):
...
@@ -21,9 +24,12 @@ class GpuCumOp(GpuKernelBase, Op):
...
@@ -21,9 +24,12 @@ class GpuCumOp(GpuKernelBase, Op):
"""
"""
SUPPORTED_NDIMS
=
3
SUPPORTED_NDIMS
=
3
__props__
=
(
'axis'
,
'mode'
)
__props__
=
(
'axis'
,
'mode'
)
params_type
=
ParamsType
(
axis
=
scalar
.
int32
,
context
=
gpu_context_type
)
def
__init__
(
self
,
axis
,
mode
=
'add'
):
def
__init__
(
self
,
axis
,
mode
=
'add'
):
self
.
axis
=
axis
if
axis
else
0
assert
axis
is
not
None
self
.
axis
=
int
(
axis
)
self
.
mode
=
mode
self
.
mode
=
mode
def
__eq__
(
self
,
other
):
def
__eq__
(
self
,
other
):
...
@@ -35,7 +41,7 @@ class GpuCumOp(GpuKernelBase, Op):
...
@@ -35,7 +41,7 @@ class GpuCumOp(GpuKernelBase, Op):
return
hash
(
self
.
axis
)
^
hash
(
self
.
mode
)
return
hash
(
self
.
axis
)
^
hash
(
self
.
mode
)
def
c_code_cache_version
(
self
):
def
c_code_cache_version
(
self
):
return
(
6
,)
return
(
7
,)
def
c_headers
(
self
):
def
c_headers
(
self
):
return
[
'<numpy_compat.h>'
,
'<gpuarray/types.h>'
,
'<gpuarray_helper.h>'
]
return
[
'<numpy_compat.h>'
,
'<gpuarray/types.h>'
,
'<gpuarray_helper.h>'
]
...
@@ -43,6 +49,9 @@ class GpuCumOp(GpuKernelBase, Op):
...
@@ -43,6 +49,9 @@ class GpuCumOp(GpuKernelBase, Op):
def
c_header_dirs
(
self
):
def
c_header_dirs
(
self
):
return
[
os
.
path
.
dirname
(
__file__
)]
return
[
os
.
path
.
dirname
(
__file__
)]
def
get_params
(
self
,
node
):
return
self
.
params_type
.
get_params
(
self
,
context
=
node
.
inputs
[
0
]
.
type
.
context
)
def
make_node
(
self
,
x
):
def
make_node
(
self
,
x
):
assert
x
.
type
.
dtype
==
'float32'
,
"Only float32 supported for GpuCumOp"
assert
x
.
type
.
dtype
==
'float32'
,
"Only float32 supported for GpuCumOp"
...
@@ -244,24 +253,18 @@ class GpuCumOp(GpuKernelBase, Op):
...
@@ -244,24 +253,18 @@ class GpuCumOp(GpuKernelBase, Op):
def
c_code
(
self
,
node
,
nodename
,
inp
,
out
,
sub
):
def
c_code
(
self
,
node
,
nodename
,
inp
,
out
,
sub
):
if
node
.
inputs
[
0
]
.
type
.
context
.
kind
!=
b
'cuda'
:
if
node
.
inputs
[
0
]
.
type
.
context
.
kind
!=
b
'cuda'
:
raise
NotImplementedError
(
"cuda only"
)
raise
NotImplementedError
(
"cuda only"
)
x
,
=
inp
return
"""
z
,
=
out
axis
=
self
.
axis
if
self
.
axis
is
not
None
else
0
fail
=
sub
[
'fail'
]
ctx
=
sub
[
'params'
]
code
=
"""
const size_t* shape = PyGpuArray_DIMS(
%(x)
s);
const size_t* shape = PyGpuArray_DIMS(
%(x)
s);
bool needAllocation = !
%(z)
s || PyGpuArray_NDIM(
%(x)
s) != PyGpuArray_NDIM(
%(z)
s);
bool needAllocation = !
%(z)
s || PyGpuArray_NDIM(
%(x)
s) != PyGpuArray_NDIM(
%(z)
s);
int axis =
%(
axis)
s;
int axis =
%(
params)
s->axi
s;
if (axis < 0) {
if (axis < 0) {
// Convert negative axis to positive axis.
// Convert negative axis to positive axis.
axis += PyGpuArray_NDIM(
%(x)
s);
axis += PyGpuArray_NDIM(
%(x)
s);
}
}
if (theano_prep_output(&
%(z)
s, PyGpuArray_NDIM(
%(x)
s), PyGpuArray_DIMS(
%(x)
s),
%(x)
s->ga.typecode, GA_C_ORDER,
%(ctx)
s) != 0){
if (theano_prep_output(&
%(z)
s, PyGpuArray_NDIM(
%(x)
s), PyGpuArray_DIMS(
%(x)
s),
%(x)
s->ga.typecode, GA_C_ORDER,
%(params)
s->context) != 0) {
%(fail)
s;
%(fail)
s;
}
}
...
@@ -270,17 +273,17 @@ class GpuCumOp(GpuKernelBase, Op):
...
@@ -270,17 +273,17 @@ class GpuCumOp(GpuKernelBase, Op):
size_t max_grid_size1;
size_t max_grid_size1;
size_t max_grid_size2;
size_t max_grid_size2;
int err;
int err;
err = gpucontext_property(
%(
ctx)
s
->ctx, GA_CTX_PROP_MAXLSIZE0, &max_threads_dim0);
err = gpucontext_property(
%(
params)
s->context
->ctx, GA_CTX_PROP_MAXLSIZE0, &max_threads_dim0);
if (err != GA_NO_ERROR){
if (err != GA_NO_ERROR){
PyErr_SetString(PyExc_RuntimeError, "Could not fetch max_threads_dims0");
PyErr_SetString(PyExc_RuntimeError, "Could not fetch max_threads_dims0");
%(fail)
s;
%(fail)
s;
}
}
err = gpucontext_property(
%(
ctx)
s
->ctx, GA_CTX_PROP_MAXGSIZE1, &max_grid_size1);
err = gpucontext_property(
%(
params)
s->context
->ctx, GA_CTX_PROP_MAXGSIZE1, &max_grid_size1);
if (err != GA_NO_ERROR){
if (err != GA_NO_ERROR){
PyErr_SetString(PyExc_RuntimeError, "Could not fetch max_grid_size1");
PyErr_SetString(PyExc_RuntimeError, "Could not fetch max_grid_size1");
%(fail)
s;
%(fail)
s;
}
}
err = gpucontext_property(
%(
ctx)
s
->ctx, GA_CTX_PROP_MAXGSIZE2, &max_grid_size2);
err = gpucontext_property(
%(
params)
s->context
->ctx, GA_CTX_PROP_MAXGSIZE2, &max_grid_size2);
if (err != GA_NO_ERROR){
if (err != GA_NO_ERROR){
PyErr_SetString(PyExc_RuntimeError, "Could not fetch max_grid_size2");
PyErr_SetString(PyExc_RuntimeError, "Could not fetch max_grid_size2");
%(fail)
s;
%(fail)
s;
...
@@ -289,9 +292,7 @@ class GpuCumOp(GpuKernelBase, Op):
...
@@ -289,9 +292,7 @@ class GpuCumOp(GpuKernelBase, Op):
%(fail)
s;
%(fail)
s;
}
}
}
}
"""
%
locals
()
"""
%
dict
(
x
=
inp
[
0
],
z
=
out
[
0
],
nodename
=
nodename
,
fail
=
sub
[
'fail'
],
params
=
sub
[
'params'
])
return
code
def
c_support_code_struct
(
self
,
node
,
nodename
):
def
c_support_code_struct
(
self
,
node
,
nodename
):
code
=
"""
code
=
"""
...
...
theano/gpuarray/neighbours.py
浏览文件 @
5df0cfd8
差异被折叠。
点击展开。
theano/gpuarray/rng_mrg.py
浏览文件 @
5df0cfd8
...
@@ -7,16 +7,15 @@ http://www.iro.umontreal.ca/~simardr/ssj/indexe.html
...
@@ -7,16 +7,15 @@ http://www.iro.umontreal.ca/~simardr/ssj/indexe.html
"""
"""
from
__future__
import
absolute_import
,
print_function
,
division
from
__future__
import
absolute_import
,
print_function
,
division
import
numpy
as
np
from
theano
import
Apply
,
tensor
from
theano
import
Apply
,
tensor
from
theano.gof
import
local_optimizer
from
theano.gof
import
local_optimizer
from
theano.sandbox.rng_mrg
import
mrg_uniform_base
,
mrg_uniform
from
theano.sandbox.rng_mrg
import
mrg_uniform_base
,
mrg_uniform
from
theano.tensor
import
as_tensor_variable
,
get_vector_length
from
theano.tensor
import
as_tensor_variable
,
get_vector_length
from
theano.scalar
import
int32
as
int_t
from
.basic_ops
import
(
GpuKernelBase
,
Kernel
,
infer_context_name
,
from
.basic_ops
import
(
GpuKernelBase
,
Kernel
,
infer_context_name
,
host_from_gpu
,
as_gpuarray_variable
)
host_from_gpu
,
as_gpuarray_variable
)
from
.type
import
GpuArrayType
from
.type
import
GpuArrayType
,
gpu_context_type
from
.fp16_help
import
write_w
from
.fp16_help
import
write_w
from
.opt
import
register_opt
,
register_opt2
from
.opt
import
register_opt
,
register_opt2
...
@@ -24,6 +23,9 @@ from .opt import register_opt, register_opt2
...
@@ -24,6 +23,9 @@ from .opt import register_opt, register_opt2
class
GPUA_mrg_uniform
(
GpuKernelBase
,
mrg_uniform_base
):
class
GPUA_mrg_uniform
(
GpuKernelBase
,
mrg_uniform_base
):
# GpuArray version
# GpuArray version
_f16_ok
=
True
_f16_ok
=
True
params_type
=
mrg_uniform_base
.
params_type
.
extended
(
otypecode
=
int_t
,
context
=
gpu_context_type
)
otypecode
=
property
(
lambda
self
:
self
.
output_type
.
typecode
)
def
make_node
(
self
,
rstate
,
size
):
def
make_node
(
self
,
rstate
,
size
):
# error checking slightly redundant here, since
# error checking slightly redundant here, since
...
@@ -39,6 +41,9 @@ class GPUA_mrg_uniform(GpuKernelBase, mrg_uniform_base):
...
@@ -39,6 +41,9 @@ class GPUA_mrg_uniform(GpuKernelBase, mrg_uniform_base):
[
rstate
,
size
],
[
rstate
,
size
],
[
rstate
.
type
(),
output_type
])
[
rstate
.
type
(),
output_type
])
def
get_params
(
self
,
node
):
return
self
.
params_type
.
get_params
(
self
,
context
=
node
.
inputs
[
0
]
.
type
.
context
)
@classmethod
@classmethod
def
new
(
cls
,
rstate
,
ndim
,
dtype
,
size
):
def
new
(
cls
,
rstate
,
ndim
,
dtype
,
size
):
v_size
=
as_tensor_variable
(
size
)
v_size
=
as_tensor_variable
(
size
)
...
@@ -168,40 +173,34 @@ class GPUA_mrg_uniform(GpuKernelBase, mrg_uniform_base):
...
@@ -168,40 +173,34 @@ class GPUA_mrg_uniform(GpuKernelBase, mrg_uniform_base):
]
]
def
c_code
(
self
,
node
,
nodename
,
inp
,
out
,
sub
):
def
c_code
(
self
,
node
,
nodename
,
inp
,
out
,
sub
):
rstate
,
size
=
inp
o_rstate
,
o_sample
=
out
inplace
=
int
(
self
.
inplace
)
ndim
=
self
.
output_type
.
ndim
o_type_num
=
np
.
asarray
(
0
,
dtype
=
self
.
output_type
.
dtype
)
.
dtype
.
num
fail
=
sub
[
'fail'
]
ctx
=
sub
[
'params'
]
kname
=
self
.
gpu_kernels
(
node
,
nodename
)[
0
]
.
objvar
otypecode
=
str
(
self
.
output_type
.
typecode
)
return
"""
return
"""
npy_int64 M1 = 2147483647; //2^31 - 1
npy_int64 M1 = 2147483647; //2^31 - 1
// The +1 is to avoid odims[0] which fails on windows
size_t odims[
%(ndim)
s+1];
size_t n_elements = 1;
size_t n_elements = 1;
unsigned int n_streams;
unsigned int n_streams;
int must_alloc_sample = ((NULL ==
%(o_sample)
s)
int must_alloc_sample = ((NULL ==
%(o_sample)
s)
|| !pygpu_GpuArray_Check((PyObject*)
%(o_sample)
s)
|| !pygpu_GpuArray_Check((PyObject*)
%(o_sample)
s)
|| !(
%(o_sample)
s->ga.flags & GA_C_CONTIGUOUS)
|| !(
%(o_sample)
s->ga.flags & GA_C_CONTIGUOUS)
|| (PyGpuArray_NDIM(
%(o_sample)
s) !=
%(ndim)
s));
|| (PyGpuArray_NDIM(
%(o_sample)
s) !=
%(params)
s->ndim));
size_t* odims = (size_t*)malloc(
%(params)
s->ndim * sizeof(size_t));
if (odims == NULL) {
PyErr_NoMemory();
%(just_fail)
s
}
if (PyArray_NDIM(
%(size)
s) != 1)
if (PyArray_NDIM(
%(size)
s) != 1)
{
{
PyErr_SetString(PyExc_ValueError, "size must be vector");
PyErr_SetString(PyExc_ValueError, "size must be vector");
%(fail)
s
%(fail)
s
}
}
if (PyArray_DIMS(
%(size)
s)[0] !=
%(
ndim)
s
)
if (PyArray_DIMS(
%(size)
s)[0] !=
%(
params)
s->ndim
)
{
{
PyErr_Format(PyExc_ValueError, "size must have length
%%
i (not
%%
li)",
PyErr_Format(PyExc_ValueError, "size must have length
%%
i (not
%%
li)",
%(
ndim)
s
, PyArray_DIMS(
%(size)
s)[0]);
%(
params)
s->ndim
, PyArray_DIMS(
%(size)
s)[0]);
%(fail)
s
%(fail)
s
}
}
for (int i = 0; i <
%(
ndim)
s
; ++i)
for (int i = 0; i <
%(
params)
s->ndim
; ++i)
{
{
odims[i] = *(dtype_
%(size)
s *)PyArray_GETPTR1(
%(size)
s, i);
odims[i] = *(dtype_
%(size)
s *)PyArray_GETPTR1(
%(size)
s, i);
n_elements *= odims[i];
n_elements *= odims[i];
...
@@ -219,8 +218,8 @@ class GPUA_mrg_uniform(GpuKernelBase, mrg_uniform_base):
...
@@ -219,8 +218,8 @@ class GPUA_mrg_uniform(GpuKernelBase, mrg_uniform_base):
if (must_alloc_sample)
if (must_alloc_sample)
{
{
Py_XDECREF(
%(o_sample)
s);
Py_XDECREF(
%(o_sample)
s);
%(o_sample)
s = pygpu_empty(
%(
ndim)
s, odims,
%(otypecode)
s
, GA_C_ORDER,
%(o_sample)
s = pygpu_empty(
%(
params)
s->ndim, odims,
%(params)
s->otypecode
, GA_C_ORDER,
%(
ctx)
s
, Py_None);
%(
params)
s->context
, Py_None);
if(!
%(o_sample)
s)
if(!
%(o_sample)
s)
{
{
%(fail)
s;
%(fail)
s;
...
@@ -233,7 +232,7 @@ class GPUA_mrg_uniform(GpuKernelBase, mrg_uniform_base):
...
@@ -233,7 +232,7 @@ class GPUA_mrg_uniform(GpuKernelBase, mrg_uniform_base):
}
}
Py_XDECREF(
%(o_rstate)
s);
Py_XDECREF(
%(o_rstate)
s);
if (
%(
inplace)
s
)
if (
%(
params)
s->inplace
)
{
{
Py_INCREF(
%(rstate)
s);
Py_INCREF(
%(rstate)
s);
%(o_rstate)
s =
%(rstate)
s;
%(o_rstate)
s =
%(rstate)
s;
...
@@ -285,10 +284,22 @@ class GPUA_mrg_uniform(GpuKernelBase, mrg_uniform_base):
...
@@ -285,10 +284,22 @@ class GPUA_mrg_uniform(GpuKernelBase, mrg_uniform_base):
%(fail)
s
%(fail)
s
}
}
}
}
"""
%
locals
()
free(odims);
"""
%
dict
(
rstate
=
inp
[
0
],
size
=
inp
[
1
],
o_rstate
=
out
[
0
],
o_sample
=
out
[
1
],
kname
=
self
.
gpu_kernels
(
node
,
nodename
)[
0
]
.
objvar
,
params
=
sub
[
'params'
],
just_fail
=
sub
[
'fail'
],
fail
=
"""
{
free(odims);
%(fail)
s
}
"""
%
dict
(
fail
=
sub
[
'fail'
]))
def
c_code_cache_version
(
self
):
def
c_code_cache_version
(
self
):
return
(
1
4
,)
return
(
1
5
,)
@register_opt2
([
mrg_uniform
],
'fast_compile'
)
@register_opt2
([
mrg_uniform
],
'fast_compile'
)
...
...
theano/gpuarray/subtensor.py
浏览文件 @
5df0cfd8
差异被折叠。
点击展开。
theano/tensor/nnet/neighbours.py
浏览文件 @
5df0cfd8
...
@@ -8,6 +8,7 @@ import numpy as np
...
@@ -8,6 +8,7 @@ import numpy as np
import
theano
import
theano
from
theano
import
Op
,
Apply
from
theano
import
Op
,
Apply
from
theano.gof
import
EnumList
import
theano.tensor
as
T
import
theano.tensor
as
T
from
theano.gradient
import
grad_not_implemented
from
theano.gradient
import
grad_not_implemented
from
theano.gradient
import
grad_undefined
from
theano.gradient
import
grad_undefined
...
@@ -39,13 +40,21 @@ class Images2Neibs(Op):
...
@@ -39,13 +40,21 @@ class Images2Neibs(Op):
"""
"""
__props__
=
(
"mode"
,)
__props__
=
(
"mode"
,)
BORDER_MODE
=
EnumList
((
'MODE_VALID'
,
'valid'
),
(
'MODE_HALF'
,
'half'
),
(
'MODE_FULL'
,
'full'
),
(
'MODE_WRAP_CENTERED'
,
'wrap_centered'
),
(
'MODE_IGNORE_BORDERS'
,
'ignore_borders'
))
params_type
=
BORDER_MODE
def
get_params
(
self
,
node
):
return
self
.
mode
def
__init__
(
self
,
mode
=
'valid'
):
def
__init__
(
self
,
mode
=
'valid'
):
if
mode
not
in
[
'valid'
,
'half'
,
'full'
,
implemented_modes
=
self
.
BORDER_MODE
.
get_aliases
()
'wrap_centered'
,
'ignore_borders'
]:
if
mode
not
in
implemented_modes
:
raise
NotImplementedError
(
"Only the mode valid, half, full, "
raise
NotImplementedError
(
"Only modes
%
s have been implemented for
%
s"
"ignore_borders and wrap_centered have "
%
(
', '
.
join
(
implemented_modes
),
type
(
self
)
.
__name__
))
"been implemented for Images2Neibs"
)
self
.
mode
=
mode
self
.
mode
=
mode
def
__str__
(
self
):
def
__str__
(
self
):
...
@@ -159,9 +168,9 @@ class Images2Neibs(Op):
...
@@ -159,9 +168,9 @@ class Images2Neibs(Op):
grad_undefined
(
self
,
2
,
neib_step
)]
grad_undefined
(
self
,
2
,
neib_step
)]
def
c_code_cache_version
(
self
):
def
c_code_cache_version
(
self
):
return
(
8
,)
return
(
10
,)
def
perform
(
self
,
node
,
inp
,
out_
):
def
perform
(
self
,
node
,
inp
,
out_
,
params
):
ten4
,
neib_shape
,
neib_step
=
inp
ten4
,
neib_shape
,
neib_step
=
inp
z
,
=
out_
z
,
=
out_
# GpuImages2Neibs should not run this perform in DebugMode
# GpuImages2Neibs should not run this perform in DebugMode
...
@@ -344,11 +353,6 @@ class Images2Neibs(Op):
...
@@ -344,11 +353,6 @@ class Images2Neibs(Op):
return
[(
z_dim0
,
z_dim1
)]
return
[(
z_dim0
,
z_dim1
)]
def
c_code
(
self
,
node
,
name
,
inp
,
out
,
sub
):
def
c_code
(
self
,
node
,
name
,
inp
,
out
,
sub
):
ten4
,
neib_shape
,
neib_step
=
inp
z
,
=
out
fail
=
sub
[
'fail'
]
mode
=
self
.
mode
return
"""
return
"""
#ifndef CEIL_INTDIV
#ifndef CEIL_INTDIV
#define CEIL_INTDIV(a, b) ((a/b) + ((a
%%
b) ? 1: 0))
#define CEIL_INTDIV(a, b) ((a/b) + ((a
%%
b) ? 1: 0))
...
@@ -408,7 +412,7 @@ class Images2Neibs(Op):
...
@@ -408,7 +412,7 @@ class Images2Neibs(Op):
%(fail)
s;
%(fail)
s;
}
}
if (
"
%(mode)
s" == "wrap_centered"
) {
if (
%(mode)
s == MODE_WRAP_CENTERED
) {
if (c
%%2
!=1 || d
%%2
!=1){
if (c
%%2
!=1 || d
%%2
!=1){
PyErr_Format(PyExc_TypeError,
PyErr_Format(PyExc_TypeError,
"Images2Neibs: in mode wrap_centered"
"Images2Neibs: in mode wrap_centered"
...
@@ -430,7 +434,7 @@ class Images2Neibs(Op):
...
@@ -430,7 +434,7 @@ class Images2Neibs(Op):
grid_c = CEIL_INTDIV(((PyArray_DIMS(
%(ten4)
s))[2]),step_x);
grid_c = CEIL_INTDIV(((PyArray_DIMS(
%(ten4)
s))[2]),step_x);
grid_d = CEIL_INTDIV(((PyArray_DIMS(
%(ten4)
s))[3]),step_y);
grid_d = CEIL_INTDIV(((PyArray_DIMS(
%(ten4)
s))[3]),step_y);
}
else if ( "
%(mode)
s" == "valid"
) {
}
else if (
%(mode)
s == MODE_VALID
) {
if ( ((PyArray_DIMS(
%(ten4)
s))[2] < c) ||
if ( ((PyArray_DIMS(
%(ten4)
s))[2] < c) ||
( (((PyArray_DIMS(
%(ten4)
s))[2]-c)
%%
step_x)!=0))
( (((PyArray_DIMS(
%(ten4)
s))[2]-c)
%%
step_x)!=0))
{
{
...
@@ -455,12 +459,12 @@ class Images2Neibs(Op):
...
@@ -455,12 +459,12 @@ class Images2Neibs(Op):
grid_c = 1+(((PyArray_DIMS(
%(ten4)
s))[2]-c)/step_x);
grid_c = 1+(((PyArray_DIMS(
%(ten4)
s))[2]-c)/step_x);
//number of patch in width
//number of patch in width
grid_d = 1+(((PyArray_DIMS(
%(ten4)
s))[3]-d)/step_y);
grid_d = 1+(((PyArray_DIMS(
%(ten4)
s))[3]-d)/step_y);
}
else if ( "
%(mode)
s" == "ignore_borders"
) {
}
else if (
%(mode)
s == MODE_IGNORE_BORDERS
) {
//number of patch in height
//number of patch in height
grid_c = 1+(((PyArray_DIMS(
%(ten4)
s))[2]-c)/step_x);
grid_c = 1+(((PyArray_DIMS(
%(ten4)
s))[2]-c)/step_x);
//number of patch in width
//number of patch in width
grid_d = 1+(((PyArray_DIMS(
%(ten4)
s))[3]-d)/step_y);
grid_d = 1+(((PyArray_DIMS(
%(ten4)
s))[3]-d)/step_y);
}
else if ( "
%(mode)
s" == "half"
) {
}
else if (
%(mode)
s == MODE_HALF
) {
if ( ((PyArray_DIMS(
%(ten4)
s))[2] < c) ||
if ( ((PyArray_DIMS(
%(ten4)
s))[2] < c) ||
( (((PyArray_DIMS(
%(ten4)
s))[2]-(c
%%2
))
%%
step_x)!=0))
( (((PyArray_DIMS(
%(ten4)
s))[2]-(c
%%2
))
%%
step_x)!=0))
{
{
...
@@ -485,7 +489,7 @@ class Images2Neibs(Op):
...
@@ -485,7 +489,7 @@ class Images2Neibs(Op):
grid_c = 1+(((PyArray_DIMS(
%(ten4)
s))[2]-(c
%%2
))/step_x);
grid_c = 1+(((PyArray_DIMS(
%(ten4)
s))[2]-(c
%%2
))/step_x);
//number of patch in width
//number of patch in width
grid_d = 1+(((PyArray_DIMS(
%(ten4)
s))[3]-(d
%%2
))/step_y);
grid_d = 1+(((PyArray_DIMS(
%(ten4)
s))[3]-(d
%%2
))/step_y);
}
else if ( "
%(mode)
s" == "full"
) {
}
else if (
%(mode)
s == MODE_FULL
) {
if ( ((PyArray_DIMS(
%(ten4)
s))[2] < c) ||
if ( ((PyArray_DIMS(
%(ten4)
s))[2] < c) ||
( (((PyArray_DIMS(
%(ten4)
s))[2]+c-2)
%%
step_x)!=0))
( (((PyArray_DIMS(
%(ten4)
s))[2]+c-2)
%%
step_x)!=0))
{
{
...
@@ -510,9 +514,9 @@ class Images2Neibs(Op):
...
@@ -510,9 +514,9 @@ class Images2Neibs(Op):
grid_c = 1+(((PyArray_DIMS(
%(ten4)
s))[2]+c-2)/step_x);
grid_c = 1+(((PyArray_DIMS(
%(ten4)
s))[2]+c-2)/step_x);
//number of patch in width
//number of patch in width
grid_d = 1+(((PyArray_DIMS(
%(ten4)
s))[3]+d-2)/step_y);
grid_d = 1+(((PyArray_DIMS(
%(ten4)
s))[3]+d-2)/step_y);
}else {
}
else {
PyErr_Format(PyExc_TypeError,
PyErr_Format(PyExc_TypeError,
"Images2Neibs: unknow mode
'
%(mode)
s'"
);
"Images2Neibs: unknow mode
%%
d",
%(mode)
s
);
%(fail)
s;
%(fail)
s;
}
}
...
@@ -572,13 +576,13 @@ class Images2Neibs(Op):
...
@@ -572,13 +576,13 @@ class Images2Neibs(Op):
for (int i = 0; i < c; i++) // loop over c
for (int i = 0; i < c; i++) // loop over c
{
{
int ten4_2 = i + a * step_x;
int ten4_2 = i + a * step_x;
if (
"
%(mode)
s" == "wrap_centered" )
{
if (
%(mode)
s == MODE_WRAP_CENTERED)
{
ten4_2 -= wrap_centered_half_idx_shift_x;
ten4_2 -= wrap_centered_half_idx_shift_x;
if ( ten4_2 < 0 ) ten4_2 += height;
if ( ten4_2 < 0 ) ten4_2 += height;
else if (ten4_2 >= height) ten4_2 -= height;
else if (ten4_2 >= height) ten4_2 -= height;
} else if (
"
%(mode)
s" == "half" )
{
} else if (
%(mode)
s == MODE_HALF)
{
ten4_2 -= wrap_centered_half_idx_shift_x;
ten4_2 -= wrap_centered_half_idx_shift_x;
} else if (
"
%(mode)
s" == "full" )
{
} else if (
%(mode)
s == MODE_FULL)
{
ten4_2 -= c - 1;
ten4_2 -= c - 1;
}
}
if (ten4_2 < 0 | ten4_2 >= height) {
if (ten4_2 < 0 | ten4_2 >= height) {
...
@@ -588,13 +592,13 @@ class Images2Neibs(Op):
...
@@ -588,13 +592,13 @@ class Images2Neibs(Op):
for (int j = 0; j < d; j++) // loop over d
for (int j = 0; j < d; j++) // loop over d
{
{
int ten4_3 = j + b * step_y;
int ten4_3 = j + b * step_y;
if (
"
%(mode)
s" == "wrap_centered" )
{
if (
%(mode)
s == MODE_WRAP_CENTERED)
{
ten4_3 -= wrap_centered_half_idx_shift_y;
ten4_3 -= wrap_centered_half_idx_shift_y;
if ( ten4_3 < 0 ) ten4_3 += width;
if ( ten4_3 < 0 ) ten4_3 += width;
else if (ten4_3 >= width) ten4_3 -= width;
else if (ten4_3 >= width) ten4_3 -= width;
} else if (
"
%(mode)
s" == "half" )
{
} else if (
%(mode)
s == MODE_HALF)
{
ten4_3 -= wrap_centered_half_idx_shift_y;
ten4_3 -= wrap_centered_half_idx_shift_y;
} else if (
"
%(mode)
s" == "full" )
{
} else if (
%(mode)
s == MODE_FULL)
{
ten4_3 -= d - 1;
ten4_3 -= d - 1;
}
}
int z_col = j + d * i;
int z_col = j + d * i;
...
@@ -609,7 +613,8 @@ class Images2Neibs(Op):
...
@@ -609,7 +613,8 @@ class Images2Neibs(Op):
}
}
}
}
} // END NESTED SCOPE
} // END NESTED SCOPE
"""
%
locals
()
"""
%
dict
(
ten4
=
inp
[
0
],
neib_shape
=
inp
[
1
],
neib_step
=
inp
[
2
],
z
=
out
[
0
],
fail
=
sub
[
'fail'
],
mode
=
sub
[
'params'
])
def
images2neibs
(
ten4
,
neib_shape
,
neib_step
=
None
,
mode
=
'valid'
):
def
images2neibs
(
ten4
,
neib_shape
,
neib_step
=
None
,
mode
=
'valid'
):
...
...
theano/tensor/subtensor.py
浏览文件 @
5df0cfd8
...
@@ -12,7 +12,7 @@ import theano
...
@@ -12,7 +12,7 @@ import theano
from
theano.compat
import
izip
from
theano.compat
import
izip
from
theano.gradient
import
DisconnectedType
from
theano.gradient
import
DisconnectedType
from
theano
import
gof
from
theano
import
gof
from
theano.gof
import
Apply
,
hashtype
,
Op
,
Type
,
MethodNotDefined
from
theano.gof
import
Apply
,
hashtype
,
Op
,
Type
,
MethodNotDefined
,
ParamsType
from
theano.printing
import
pprint
from
theano.printing
import
pprint
from
theano
import
scalar
as
scal
from
theano
import
scalar
as
scal
from
theano.tensor.basic
import
alloc
from
theano.tensor.basic
import
alloc
...
@@ -1685,6 +1685,7 @@ class AdvancedSubtensor1(Op):
...
@@ -1685,6 +1685,7 @@ class AdvancedSubtensor1(Op):
# of the grad() method.
# of the grad() method.
__props__
=
()
__props__
=
()
_f16_ok
=
True
_f16_ok
=
True
check_input
=
False
def
__init__
(
self
,
sparse_grad
=
False
):
def
__init__
(
self
,
sparse_grad
=
False
):
self
.
sparse_grad
=
sparse_grad
self
.
sparse_grad
=
sparse_grad
...
@@ -1872,10 +1873,13 @@ class AdvancedIncSubtensor1(Op):
...
@@ -1872,10 +1873,13 @@ class AdvancedIncSubtensor1(Op):
"""
"""
__props__
=
(
'inplace'
,
'set_instead_of_inc'
)
__props__
=
(
'inplace'
,
'set_instead_of_inc'
)
check_input
=
False
params_type
=
ParamsType
(
inplace
=
scal
.
bool
,
set_instead_of_inc
=
scal
.
bool
)
def
__init__
(
self
,
inplace
=
False
,
set_instead_of_inc
=
False
):
def
__init__
(
self
,
inplace
=
False
,
set_instead_of_inc
=
False
):
self
.
inplace
=
inplace
self
.
inplace
=
bool
(
inplace
)
self
.
set_instead_of_inc
=
set_instead_of_inc
self
.
set_instead_of_inc
=
bool
(
set_instead_of_inc
)
if
inplace
:
if
inplace
:
self
.
destroy_map
=
{
0
:
[
0
]}
self
.
destroy_map
=
{
0
:
[
0
]}
...
@@ -1955,17 +1959,11 @@ class AdvancedIncSubtensor1(Op):
...
@@ -1955,17 +1959,11 @@ class AdvancedIncSubtensor1(Op):
raise
NotImplementedError
raise
NotImplementedError
x
,
y
,
idx
=
input_names
x
,
y
,
idx
=
input_names
out
=
output_names
[
0
]
out
=
output_names
[
0
]
fail
=
sub
[
'fail'
]
inc_or_set
=
1
-
self
.
set_instead_of_inc
if
self
.
inplace
:
# convert bool to int
inplace
=
1
else
:
inplace
=
0
copy_of_x
=
self
.
copy_of_x
(
x
)
copy_of_x
=
self
.
copy_of_x
(
x
)
return
"""
return
"""
PyObject* rval = NULL;
PyObject* rval = NULL;
if (
%(
inplace)
s
)
if (
%(
params)
s->inplace
)
{
{
if (
%(x)
s !=
%(out)
s)
if (
%(x)
s !=
%(out)
s)
{
{
...
@@ -1983,16 +1981,17 @@ class AdvancedIncSubtensor1(Op):
...
@@ -1983,16 +1981,17 @@ class AdvancedIncSubtensor1(Op):
%(fail)
s
%(fail)
s
}
}
}
}
if (inplace_increment(
%(out)
s, (PyObject *)
%(idx)
s,
%(y)
s,
%(inc_or_set)
d
)) {
if (inplace_increment(
%(out)
s, (PyObject *)
%(idx)
s,
%(y)
s,
(1 -
%(params)
s->set_instead_of_inc)
)) {
%(fail)
s;
%(fail)
s;
}
}
Py_XDECREF(rval);
Py_XDECREF(rval);
"""
%
locals
()
"""
%
dict
(
x
=
x
,
y
=
y
,
idx
=
idx
,
out
=
out
,
copy_of_x
=
copy_of_x
,
params
=
sub
[
'params'
],
fail
=
sub
[
'fail'
])
def
c_code_cache_version
(
self
):
def
c_code_cache_version
(
self
):
return
(
6
,)
return
(
8
,)
def
perform
(
self
,
node
,
inp
,
out_
):
def
perform
(
self
,
node
,
inp
,
out_
,
params
):
# TODO opt to make this inplace
# TODO opt to make this inplace
x
,
y
,
idx
=
inp
x
,
y
,
idx
=
inp
out
,
=
out_
out
,
=
out_
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论