Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
388f057b
提交
388f057b
authored
8月 03, 2017
作者:
João Victor Risso
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Remove spatial transformer descriptor from grid and sampler Ops
Signed-off-by:
João Victor Risso
<
joaovictor.risso@gmail.com
>
上级
ed89dc9e
显示空白字符变更
内嵌
并排
正在显示
4 个修改的文件
包含
125 行增加
和
180 行删除
+125
-180
dnn_sptf_desc.c
theano/gpuarray/c_code/dnn_sptf_desc.c
+0
-46
dnn_sptf_grid.c
theano/gpuarray/c_code/dnn_sptf_grid.c
+58
-8
dnn_sptf_sampler.c
theano/gpuarray/c_code/dnn_sptf_sampler.c
+40
-4
dnn.py
theano/gpuarray/dnn.py
+27
-122
没有找到文件。
theano/gpuarray/c_code/dnn_sptf_desc.c
deleted
100644 → 0
浏览文件 @
ed89dc9e
#section support_code_apply
int
APPLY_SPECIFIC
(
dnn_sptf_desc
)(
PyArrayObject
*
out_dims
,
cudnnSpatialTransformerDescriptor_t
*
desc
,
PARAMS_TYPE
*
params
)
{
cudnnStatus_t
err
;
const
int
nimages
=
(
int
)
*
((
npy_int64
*
)
PyArray_GETPTR1
(
out_dims
,
0
));
const
int
nchannels
=
(
int
)
*
((
npy_int64
*
)
PyArray_GETPTR1
(
out_dims
,
1
));
const
int
height
=
(
int
)
*
((
npy_int64
*
)
PyArray_GETPTR1
(
out_dims
,
2
));
const
int
width
=
(
int
)
*
((
npy_int64
*
)
PyArray_GETPTR1
(
out_dims
,
3
));
if
(
nimages
==
0
||
nchannels
==
0
||
height
==
0
||
width
==
0
)
{
PyErr_SetString
(
PyExc_RuntimeError
,
"GpuDnnTransformerDesc: invalid grid dimensions"
);
return
1
;
}
// num_images, num_channels, height, width
const
int
out_tensor_dims
[
4
]
=
{
nimages
,
nchannels
,
height
,
width
};
err
=
cudnnCreateSpatialTransformerDescriptor
(
desc
);
if
(
CUDNN_STATUS_SUCCESS
!=
err
)
{
PyErr_Format
(
PyExc_MemoryError
,
"GpuDnnTransformerDesc: could not allocate descriptor: %s"
,
cudnnGetErrorString
(
err
)
);
return
1
;
}
// Currently, only the bilinear sampler is supported by cuDNN,
// so it is not available as a parameter
err
=
cudnnSetSpatialTransformerNdDescriptor
(
*
desc
,
CUDNN_SAMPLER_BILINEAR
,
params
->
precision
,
4
,
out_tensor_dims
);
if
(
CUDNN_STATUS_SUCCESS
!=
err
)
{
PyErr_Format
(
PyExc_MemoryError
,
"GpuDnnTransformerDesc: could not initialize descriptor: %s"
,
cudnnGetErrorString
(
err
)
);
return
1
;
}
return
0
;
}
theano/gpuarray/c_code/dnn_sptf_grid.c
浏览文件 @
388f057b
#section support_code_struct
#section support_code_struct
cudnnSpatialTransformerDescriptor_t
APPLY_SPECIFIC
(
sptf
);
#section init_code_struct
cudnnStatus_t
APPLY_SPECIFIC
(
err
)
=
CUDNN_STATUS_SUCCESS
;
APPLY_SPECIFIC
(
sptf
)
=
NULL
;
if
((
APPLY_SPECIFIC
(
err
)
=
cudnnCreateSpatialTransformerDescriptor
(
&
APPLY_SPECIFIC
(
sptf
)))
!=
CUDNN_STATUS_SUCCESS
)
{
PyErr_Format
(
PyExc_MemoryError
,
"GpuDnnTransformerGrid: could not allocate spatial transformer descriptor (sptf): %s"
,
cudnnGetErrorString
(
APPLY_SPECIFIC
(
err
)));
FAIL
;
}
#section cleanup_code_struct
if
(
APPLY_SPECIFIC
(
sptf
)
!=
NULL
)
{
cudnnDestroySpatialTransformerDescriptor
(
APPLY_SPECIFIC
(
sptf
));
}
#section support_code_struct
int
int
APPLY_SPECIFIC
(
dnn_sptf_grid
)(
PyGpuArrayObject
*
theta
,
APPLY_SPECIFIC
(
dnn_sptf_grid
)(
PyGpuArrayObject
*
theta
,
PyArrayObject
*
out_dims
,
PyArrayObject
*
out_dims
,
cudnnSpatialTransformerDescriptor_t
desc
,
PyGpuArrayObject
**
grid
,
PyGpuArrayObject
**
grid
,
cudnnHandle_t
_handle
)
cudnnHandle_t
_handle
)
{
{
PyGpuContextObject
*
gpu_ctx
=
theta
->
context
;
PyGpuContextObject
*
gpu_ctx
=
theta
->
context
;
size_t
grid_dims
[
4
];
size_t
grid_dims
[
4
];
int
num_images
,
num_channels
,
height
,
width
;
int
num_images
,
num_channels
,
height
,
width
;
int
desc_dims
[
4
];
cudnnDataType_t
dt
;
cudnnStatus_t
err
=
CUDNN_STATUS_SUCCESS
;
cudnnStatus_t
err
=
CUDNN_STATUS_SUCCESS
;
if
(
theta
->
ga
.
typecode
!=
GA_FLOAT
&&
switch
(
theta
->
ga
.
typecode
)
theta
->
ga
.
typecode
!=
GA_DOUBLE
&&
theta
->
ga
.
typecode
!=
GA_HALF
)
{
{
case
GA_DOUBLE
:
dt
=
CUDNN_DATA_DOUBLE
;
break
;
case
GA_FLOAT
:
dt
=
CUDNN_DATA_FLOAT
;
break
;
case
GA_HALF
:
dt
=
CUDNN_DATA_HALF
;
break
;
default
:
PyErr_SetString
(
PyExc_TypeError
,
PyErr_SetString
(
PyExc_TypeError
,
"GpuDnnTransformerGrid: unsupported data type for theta in spatial transformer."
);
"GpuDnnTransformerGrid: unsupported data type for theta in spatial transformer."
);
return
1
;
return
1
;
}
}
else
if
(
PyGpuArray_DIM
(
theta
,
1
)
!=
2
||
PyGpuArray_DIM
(
theta
,
2
)
!=
3
)
if
(
PyGpuArray_DIM
(
theta
,
1
)
!=
2
||
PyGpuArray_DIM
(
theta
,
2
)
!=
3
)
{
{
PyErr_Format
(
PyExc_RuntimeError
,
PyErr_Format
(
PyExc_RuntimeError
,
"GpuDnnTransformerGrid: incorrect dimensions for theta, expected (%d, %d, %d), got (%d, %d, %d)"
,
"GpuDnnTransformerGrid: incorrect dimensions for theta, expected (%d, %d, %d), got (%d, %d, %d)"
,
...
@@ -38,14 +69,33 @@ APPLY_SPECIFIC(dnn_sptf_grid)(PyGpuArrayObject * theta,
...
@@ -38,14 +69,33 @@ APPLY_SPECIFIC(dnn_sptf_grid)(PyGpuArrayObject * theta,
// Obtain output dimensions
// Obtain output dimensions
num_images
=
(
int
)
*
(
(
npy_int64
*
)
PyArray_GETPTR1
(
out_dims
,
0
)
);
num_images
=
(
int
)
*
(
(
npy_int64
*
)
PyArray_GETPTR1
(
out_dims
,
0
)
);
num_channels
=
(
int
)
*
(
(
npy_int64
*
)
PyArray_GETPTR1
(
out_dims
,
1
)
);
height
=
(
int
)
*
(
(
npy_int64
*
)
PyArray_GETPTR1
(
out_dims
,
2
)
);
height
=
(
int
)
*
(
(
npy_int64
*
)
PyArray_GETPTR1
(
out_dims
,
2
)
);
width
=
(
int
)
*
(
(
npy_int64
*
)
PyArray_GETPTR1
(
out_dims
,
3
)
);
width
=
(
int
)
*
(
(
npy_int64
*
)
PyArray_GETPTR1
(
out_dims
,
3
)
);
// Set grid dimensions
// Set transformed output dimensions to setup the descriptor
desc_dims
[
0
]
=
num_images
;
desc_dims
[
1
]
=
num_channels
;
desc_dims
[
2
]
=
height
;
desc_dims
[
3
]
=
width
;
// Set sampling grid dimensions
grid_dims
[
0
]
=
num_images
;
grid_dims
[
0
]
=
num_images
;
grid_dims
[
1
]
=
height
;
grid_dims
[
1
]
=
height
;
grid_dims
[
2
]
=
width
;
grid_dims
[
2
]
=
width
;
grid_dims
[
3
]
=
2
;
grid_dims
[
3
]
=
2
;
// Currently, only the bilinear sampler is supported by cuDNN,
// so the sampler method is currently not available as a parameter
err
=
cudnnSetSpatialTransformerNdDescriptor
(
APPLY_SPECIFIC
(
sptf
),
CUDNN_SAMPLER_BILINEAR
,
dt
,
4
,
desc_dims
);
if
(
CUDNN_STATUS_SUCCESS
!=
err
)
{
PyErr_Format
(
PyExc_MemoryError
,
"GpuDnnTransformerGrid: could not initialize descriptor (sptf): %s"
,
cudnnGetErrorString
(
err
)
);
return
1
;
}
if
(
theano_prep_output
(
grid
,
4
,
grid_dims
,
theta
->
ga
.
typecode
,
if
(
theano_prep_output
(
grid
,
4
,
grid_dims
,
theta
->
ga
.
typecode
,
GA_C_ORDER
,
gpu_ctx
)
!=
0
)
GA_C_ORDER
,
gpu_ctx
)
!=
0
)
{
{
...
@@ -59,8 +109,8 @@ APPLY_SPECIFIC(dnn_sptf_grid)(PyGpuArrayObject * theta,
...
@@ -59,8 +109,8 @@ APPLY_SPECIFIC(dnn_sptf_grid)(PyGpuArrayObject * theta,
cuda_wait
(
theta
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_READ
);
cuda_wait
(
theta
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_READ
);
cuda_wait
(
(
*
grid
)
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_WRITE
);
cuda_wait
(
(
*
grid
)
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_WRITE
);
err
=
cudnnSpatialTfGridGeneratorForward
(
_handle
,
desc
,
PyGpuArray_DEV_DATA
(
theta
),
err
=
cudnnSpatialTfGridGeneratorForward
(
_handle
,
APPLY_SPECIFIC
(
sptf
),
PyGpuArray_DEV_DATA
(
*
grid
)
);
PyGpuArray_DEV_DATA
(
theta
),
PyGpuArray_DEV_DATA
(
*
grid
)
);
cuda_record
(
theta
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_READ
);
cuda_record
(
theta
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_READ
);
cuda_record
(
(
*
grid
)
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_WRITE
);
cuda_record
(
(
*
grid
)
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_WRITE
);
...
...
theano/gpuarray/c_code/dnn_sptf_sampler.c
浏览文件 @
388f057b
#section support_code_struct
#section support_code_struct
cudnnSpatialTransformerDescriptor_t
APPLY_SPECIFIC
(
sptf
);
cudnnTensorDescriptor_t
APPLY_SPECIFIC
(
xdesc
);
cudnnTensorDescriptor_t
APPLY_SPECIFIC
(
xdesc
);
cudnnTensorDescriptor_t
APPLY_SPECIFIC
(
ydesc
);
cudnnTensorDescriptor_t
APPLY_SPECIFIC
(
ydesc
);
#section init_code_struct
#section init_code_struct
APPLY_SPECIFIC
(
sptf
)
=
NULL
;
APPLY_SPECIFIC
(
xdesc
)
=
NULL
;
APPLY_SPECIFIC
(
xdesc
)
=
NULL
;
APPLY_SPECIFIC
(
ydesc
)
=
NULL
;
APPLY_SPECIFIC
(
ydesc
)
=
NULL
;
{
{
cudnnStatus_t
err
=
CUDNN_STATUS_SUCCESS
;
cudnnStatus_t
err
=
CUDNN_STATUS_SUCCESS
;
err
=
cudnnCreateSpatialTransformerDescriptor
(
&
APPLY_SPECIFIC
(
sptf
));
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
PyErr_Format
(
PyExc_MemoryError
,
"GpuDnnTransformerSampler: could not allocate spatial transformer descriptor (sptf): %s"
,
cudnnGetErrorString
(
err
));
FAIL
;
}
err
=
cudnnCreateTensorDescriptor
(
&
APPLY_SPECIFIC
(
xdesc
)
);
err
=
cudnnCreateTensorDescriptor
(
&
APPLY_SPECIFIC
(
xdesc
)
);
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
{
...
@@ -31,6 +43,9 @@ APPLY_SPECIFIC(ydesc) = NULL;
...
@@ -31,6 +43,9 @@ APPLY_SPECIFIC(ydesc) = NULL;
#section cleanup_code_struct
#section cleanup_code_struct
if
(
APPLY_SPECIFIC
(
sptf
)
!=
NULL
)
cudnnDestroySpatialTransformerDescriptor
(
APPLY_SPECIFIC
(
sptf
));
if
(
APPLY_SPECIFIC
(
xdesc
)
!=
NULL
)
if
(
APPLY_SPECIFIC
(
xdesc
)
!=
NULL
)
cudnnDestroyTensorDescriptor
(
APPLY_SPECIFIC
(
xdesc
)
);
cudnnDestroyTensorDescriptor
(
APPLY_SPECIFIC
(
xdesc
)
);
...
@@ -42,7 +57,6 @@ if ( APPLY_SPECIFIC(ydesc) != NULL )
...
@@ -42,7 +57,6 @@ if ( APPLY_SPECIFIC(ydesc) != NULL )
int
int
APPLY_SPECIFIC
(
dnn_sptf_sampler
)(
PyGpuArrayObject
*
input
,
APPLY_SPECIFIC
(
dnn_sptf_sampler
)(
PyGpuArrayObject
*
input
,
PyGpuArrayObject
*
grid
,
PyGpuArrayObject
*
grid
,
cudnnSpatialTransformerDescriptor_t
desc
,
PyGpuArrayObject
**
output
,
PyGpuArrayObject
**
output
,
cudnnHandle_t
_handle
)
cudnnHandle_t
_handle
)
{
{
...
@@ -52,6 +66,8 @@ APPLY_SPECIFIC(dnn_sptf_sampler)(PyGpuArrayObject * input,
...
@@ -52,6 +66,8 @@ APPLY_SPECIFIC(dnn_sptf_sampler)(PyGpuArrayObject * input,
double
alpha
=
1
.
0
,
beta
=
0
.
0
;
double
alpha
=
1
.
0
,
beta
=
0
.
0
;
float
af
=
alpha
,
bf
=
beta
;
float
af
=
alpha
,
bf
=
beta
;
size_t
out_dims
[
4
];
size_t
out_dims
[
4
];
int
desc_dims
[
4
];
cudnnDataType_t
dt
;
cudnnStatus_t
err
=
CUDNN_STATUS_SUCCESS
;
cudnnStatus_t
err
=
CUDNN_STATUS_SUCCESS
;
switch
(
input
->
ga
.
typecode
)
switch
(
input
->
ga
.
typecode
)
...
@@ -59,14 +75,17 @@ APPLY_SPECIFIC(dnn_sptf_sampler)(PyGpuArrayObject * input,
...
@@ -59,14 +75,17 @@ APPLY_SPECIFIC(dnn_sptf_sampler)(PyGpuArrayObject * input,
case
GA_DOUBLE
:
case
GA_DOUBLE
:
alpha_p
=
(
void
*
)
&
alpha
;
alpha_p
=
(
void
*
)
&
alpha
;
beta_p
=
(
void
*
)
&
beta
;
beta_p
=
(
void
*
)
&
beta
;
dt
=
CUDNN_DATA_DOUBLE
;
break
;
break
;
case
GA_FLOAT
:
case
GA_FLOAT
:
alpha_p
=
(
void
*
)
&
af
;
alpha_p
=
(
void
*
)
&
af
;
beta_p
=
(
void
*
)
&
bf
;
beta_p
=
(
void
*
)
&
bf
;
dt
=
CUDNN_DATA_FLOAT
;
break
;
break
;
case
GA_HALF
:
case
GA_HALF
:
alpha_p
=
(
void
*
)
&
af
;
alpha_p
=
(
void
*
)
&
af
;
beta_p
=
(
void
*
)
&
bf
;
beta_p
=
(
void
*
)
&
bf
;
dt
=
CUDNN_DATA_HALF
;
break
;
break
;
default:
default:
PyErr_SetString
(
PyExc_TypeError
,
PyErr_SetString
(
PyExc_TypeError
,
...
@@ -78,6 +97,11 @@ APPLY_SPECIFIC(dnn_sptf_sampler)(PyGpuArrayObject * input,
...
@@ -78,6 +97,11 @@ APPLY_SPECIFIC(dnn_sptf_sampler)(PyGpuArrayObject * input,
out_dims
[
1
]
=
(
size_t
)
PyGpuArray_DIM
(
input
,
1
);
// num_channels
out_dims
[
1
]
=
(
size_t
)
PyGpuArray_DIM
(
input
,
1
);
// num_channels
out_dims
[
2
]
=
(
size_t
)
PyGpuArray_DIM
(
grid
,
1
);
// grid height
out_dims
[
2
]
=
(
size_t
)
PyGpuArray_DIM
(
grid
,
1
);
// grid height
out_dims
[
3
]
=
(
size_t
)
PyGpuArray_DIM
(
grid
,
2
);
// grid width
out_dims
[
3
]
=
(
size_t
)
PyGpuArray_DIM
(
grid
,
2
);
// grid width
// Set output dimensions for the descriptor setup
desc_dims
[
0
]
=
(
int
)
out_dims
[
0
];
desc_dims
[
1
]
=
(
int
)
out_dims
[
1
];
desc_dims
[
2
]
=
(
int
)
out_dims
[
2
];
desc_dims
[
3
]
=
(
int
)
out_dims
[
3
];
if
(
out_dims
[
0
]
==
0
||
out_dims
[
1
]
==
0
||
out_dims
[
2
]
==
0
||
out_dims
[
3
]
==
0
)
if
(
out_dims
[
0
]
==
0
||
out_dims
[
1
]
==
0
||
out_dims
[
2
]
==
0
||
out_dims
[
3
]
==
0
)
{
{
...
@@ -94,6 +118,18 @@ APPLY_SPECIFIC(dnn_sptf_sampler)(PyGpuArrayObject * input,
...
@@ -94,6 +118,18 @@ APPLY_SPECIFIC(dnn_sptf_sampler)(PyGpuArrayObject * input,
return
1
;
return
1
;
}
}
// Currently, only the bilinear sampler is supported by cuDNN,
// so the sampler method is currently not available as a parameter
err
=
cudnnSetSpatialTransformerNdDescriptor
(
APPLY_SPECIFIC
(
sptf
),
CUDNN_SAMPLER_BILINEAR
,
dt
,
4
,
desc_dims
);
if
(
CUDNN_STATUS_SUCCESS
!=
err
)
{
PyErr_Format
(
PyExc_MemoryError
,
"GpuDnnTransformerSampler: could not initialize descriptor: %s"
,
cudnnGetErrorString
(
err
)
);
return
1
;
}
if
(
c_set_tensorNd
(
input
,
APPLY_SPECIFIC
(
xdesc
)
)
!=
0
)
if
(
c_set_tensorNd
(
input
,
APPLY_SPECIFIC
(
xdesc
)
)
!=
0
)
return
1
;
return
1
;
...
@@ -106,9 +142,9 @@ APPLY_SPECIFIC(dnn_sptf_sampler)(PyGpuArrayObject * input,
...
@@ -106,9 +142,9 @@ APPLY_SPECIFIC(dnn_sptf_sampler)(PyGpuArrayObject * input,
cuda_wait
(
grid
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_READ
);
cuda_wait
(
grid
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_READ
);
cuda_wait
(
(
*
output
)
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_WRITE
);
cuda_wait
(
(
*
output
)
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_WRITE
);
err
=
cudnnSpatialTfSamplerForward
(
_handle
,
desc
,
alpha_p
,
APPLY_SPECIFIC
(
xdesc
)
,
err
=
cudnnSpatialTfSamplerForward
(
_handle
,
APPLY_SPECIFIC
(
sptf
),
alpha_p
,
PyGpuArray_DEV_DATA
(
input
),
PyGpuArray_DEV_DATA
(
grid
),
beta_p
,
APPLY_SPECIFIC
(
xdesc
),
PyGpuArray_DEV_DATA
(
input
),
PyGpuArray_DEV_DATA
(
grid
)
,
APPLY_SPECIFIC
(
ydesc
),
PyGpuArray_DEV_DATA
(
*
output
)
);
beta_p
,
APPLY_SPECIFIC
(
ydesc
),
PyGpuArray_DEV_DATA
(
*
output
)
);
cuda_record
(
input
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_READ
);
cuda_record
(
input
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_READ
);
cuda_record
(
grid
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_READ
);
cuda_record
(
grid
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_READ
);
...
...
theano/gpuarray/dnn.py
浏览文件 @
388f057b
...
@@ -13,6 +13,7 @@ from theano import Op, Apply, tensor, config, Variable
...
@@ -13,6 +13,7 @@ from theano import Op, Apply, tensor, config, Variable
from
theano.scalar
import
(
as_scalar
,
constant
,
Log
,
get_scalar_type
,
from
theano.scalar
import
(
as_scalar
,
constant
,
Log
,
get_scalar_type
,
int32
as
int_t
,
bool
as
bool_t
,
uint32
as
uint32_t
)
int32
as
int_t
,
bool
as
bool_t
,
uint32
as
uint32_t
)
from
theano.tensor
import
as_tensor_variable
,
Argmax
from
theano.tensor
import
as_tensor_variable
,
Argmax
from
theano.tensor.extra_ops
import
cpu_contiguous
from
theano.gradient
import
DisconnectedType
,
grad_not_implemented
from
theano.gradient
import
DisconnectedType
,
grad_not_implemented
from
theano.gof
import
Optimizer
,
local_optimizer
,
COp
,
ParamsType
,
EnumList
from
theano.gof
import
Optimizer
,
local_optimizer
,
COp
,
ParamsType
,
EnumList
from
theano.gof.cmodule
import
GCC_compiler
from
theano.gof.cmodule
import
GCC_compiler
...
@@ -2738,66 +2739,12 @@ def dnn_batch_normalization_test(inputs, gamma, beta, mean, var,
...
@@ -2738,66 +2739,12 @@ def dnn_batch_normalization_test(inputs, gamma, beta, mean, var,
return
result
return
result
class
GpuDnnTransformerDesc
(
COp
):
"""
Descriptor Op for cuDNN Spatial Transformer.
"""
__props__
=
(
'precision'
,)
params_type
=
ParamsType
(
precision
=
cudnn
.
cudnnDataType_t
)
def
c_headers
(
self
):
return
[
'cudnn.h'
,
'cudnn_helper.h'
]
def
c_header_dirs
(
self
):
header_dirs
=
[
os
.
path
.
dirname
(
__file__
)]
if
config
.
dnn
.
include_path
:
header_dirs
+=
[
config
.
dnn
.
include_path
]
return
header_dirs
def
c_libraries
(
self
):
return
[
'cudnn'
]
def
c_lib_dirs
(
self
):
lib_dirs
=
[]
if
config
.
dnn
.
library_path
:
lib_dirs
+=
[
config
.
dnn
.
library_path
]
return
lib_dirs
def
do_constant_folding
(
self
,
node
):
return
False
def
__init__
(
self
,
precision
=
theano
.
config
.
floatX
):
COp
.
__init__
(
self
,
[
"c_code/dnn_sptf_desc.c"
],
"APPLY_SPECIFIC(dnn_sptf_desc)"
)
assert
cudnn
.
cudnnDataType_t
.
has_alias
(
precision
)
self
.
precision
=
precision
def
make_node
(
self
,
out_dims
):
out_dims
=
as_tensor_variable
(
out_dims
)
assert
out_dims
.
dtype
in
theano
.
tensor
.
basic
.
integer_dtypes
assert
out_dims
.
ndim
==
1
out_dims
=
theano
.
tensor
.
basic
.
cast
(
out_dims
,
'int64'
)
node
=
Apply
(
self
,
[
out_dims
],
[
CDataType
(
"cudnnSpatialTransformerDescriptor_t"
,
freefunc
=
"cudnnDestroySpatialTransformerDescriptor"
)()])
# DebugMode cannot compare the values of CDataType variables, so by
# default it returns False all the time. To prevent DebugMode from
# complaining because of the MergeOptimizer, we make this variable
# always compare to True.
out
=
node
.
outputs
[
0
]
out
.
tag
.
values_eq_approx
=
tensor
.
type
.
values_eq_approx_always_true
return
node
def
c_code_cache_version
(
self
):
return
(
super
(
GpuDnnTransformerDesc
,
self
)
.
c_code_cache_version
(),
version
())
class
GpuDnnTransformerGrid
(
DnnBase
):
class
GpuDnnTransformerGrid
(
DnnBase
):
"""
"""
Grid generator Op for cuDNN Spatial Transformer.
Grid generator Op for cuDNN Spatial Transformer.
"""
"""
__props__
=
()
__props__
=
()
_cop_num_inputs
=
3
_cop_num_inputs
=
2
_cop_num_outputs
=
1
_cop_num_outputs
=
1
_f16_ok
=
True
_f16_ok
=
True
check_input
=
False
check_input
=
False
...
@@ -2805,7 +2752,7 @@ class GpuDnnTransformerGrid(DnnBase):
...
@@ -2805,7 +2752,7 @@ class GpuDnnTransformerGrid(DnnBase):
def
__init__
(
self
):
def
__init__
(
self
):
DnnBase
.
__init__
(
self
,
[
"c_code/dnn_sptf_grid.c"
],
"APPLY_SPECIFIC(dnn_sptf_grid)"
)
DnnBase
.
__init__
(
self
,
[
"c_code/dnn_sptf_grid.c"
],
"APPLY_SPECIFIC(dnn_sptf_grid)"
)
def
make_node
(
self
,
theta
,
desc
):
def
make_node
(
self
,
theta
,
out_dims
):
"""
"""
Create a grid generator node for a cuDNN Spatial Transformer
Create a grid generator node for a cuDNN Spatial Transformer
...
@@ -2815,22 +2762,14 @@ class GpuDnnTransformerGrid(DnnBase):
...
@@ -2815,22 +2762,14 @@ class GpuDnnTransformerGrid(DnnBase):
Affine transformation tensor containing one affine transformation
Affine transformation tensor containing one affine transformation
matrix per image. ``theta`` is usually generated by the localization
matrix per image. ``theta`` is usually generated by the localization
network.
network.
desc : GpuDnnTransformerDesc
Spatial transformer descriptor
"""
"""
context_name
=
infer_context_name
(
desc
)
context_name
=
infer_context_name
(
theta
)
if
(
not
isinstance
(
desc
.
type
,
CDataType
)
or
desc
.
type
.
ctype
!=
'cudnnSpatialTransformerDescriptor_t'
):
raise
ValueError
(
'desc must be cudnnSpatialTransformerDescriptor_t'
)
theta
=
gpu_contiguous
(
as_gpuarray_variable
(
theta
,
context_name
))
theta
=
gpu_contiguous
(
as_gpuarray_variable
(
theta
,
context_name
))
assert
theta
.
dtype
in
(
'float16'
,
'float32'
,
'float64'
)
assert
theta
.
dtype
in
(
'float16'
,
'float32'
,
'float64'
)
assert
theta
.
ndim
==
3
assert
theta
.
ndim
==
3
# Setup output dimensions using input from descriptor
out_dims
=
cpu_contiguous
(
as_tensor_variable
(
out_dims
))
out_dims
=
as_tensor_variable
(
desc
.
owner
.
inputs
[
0
])
assert
out_dims
.
dtype
in
theano
.
tensor
.
basic
.
integer_dtypes
assert
out_dims
.
dtype
in
theano
.
tensor
.
basic
.
integer_dtypes
assert
out_dims
.
ndim
==
1
assert
out_dims
.
ndim
==
1
# Ensure 64-bit ints are passed to the C code
# Ensure 64-bit ints are passed to the C code
...
@@ -2839,20 +2778,16 @@ class GpuDnnTransformerGrid(DnnBase):
...
@@ -2839,20 +2778,16 @@ class GpuDnnTransformerGrid(DnnBase):
broadcastable
=
(
theta
.
type
.
ndim
+
1
)
*
(
False
,),
broadcastable
=
(
theta
.
type
.
ndim
+
1
)
*
(
False
,),
context_name
=
context_name
)()
context_name
=
context_name
)()
inputs
=
[
theta
,
out_dims
,
desc
]
inputs
=
[
theta
,
out_dims
]
outputs
=
[
grid
]
outputs
=
[
grid
]
return
Apply
(
self
,
inputs
,
outputs
)
return
Apply
(
self
,
inputs
,
outputs
)
def
grad
(
self
,
inputs
,
grads
):
def
grad
(
self
,
inputs
,
grads
):
theta
,
out_dims
,
desc
=
inputs
theta
,
out_dims
=
inputs
dgrid
=
grads
[
0
]
dgrid
=
grads
[
0
]
dtheta
=
GpuDnnTransformerGradT
()(
dgrid
,
desc
)
dtheta
=
GpuDnnTransformerGradT
()(
dgrid
)
return
[
dtheta
,
grad_not_implemented
(
self
,
1
,
out_dims
),
DisconnectedType
()()]
return
[
dtheta
,
grad_not_implemented
(
self
,
1
,
out_dims
)]
def
connection_pattern
(
self
,
node
):
# not connected to desc
return
[[
1
],
[
1
],
[
0
]]
class
GpuDnnTransformerSampler
(
DnnBase
):
class
GpuDnnTransformerSampler
(
DnnBase
):
...
@@ -2860,7 +2795,7 @@ class GpuDnnTransformerSampler(DnnBase):
...
@@ -2860,7 +2795,7 @@ class GpuDnnTransformerSampler(DnnBase):
Grid sampler Op for cuDNN Spatial Transformer.
Grid sampler Op for cuDNN Spatial Transformer.
"""
"""
__props__
=
()
__props__
=
()
_cop_num_inputs
=
3
_cop_num_inputs
=
2
_cop_num_outputs
=
1
_cop_num_outputs
=
1
_f16_ok
=
True
_f16_ok
=
True
check_input
=
False
check_input
=
False
...
@@ -2868,7 +2803,7 @@ class GpuDnnTransformerSampler(DnnBase):
...
@@ -2868,7 +2803,7 @@ class GpuDnnTransformerSampler(DnnBase):
def
__init__
(
self
):
def
__init__
(
self
):
DnnBase
.
__init__
(
self
,
[
"c_code/dnn_sptf_sampler.c"
],
"APPLY_SPECIFIC(dnn_sptf_sampler)"
)
DnnBase
.
__init__
(
self
,
[
"c_code/dnn_sptf_sampler.c"
],
"APPLY_SPECIFIC(dnn_sptf_sampler)"
)
def
make_node
(
self
,
img
,
grid
,
desc
):
def
make_node
(
self
,
img
,
grid
):
"""
"""
Create a grid sampler node for a cuDNN Spatial Transformer
Create a grid sampler node for a cuDNN Spatial Transformer
...
@@ -2883,15 +2818,8 @@ class GpuDnnTransformerSampler(DnnBase):
...
@@ -2883,15 +2818,8 @@ class GpuDnnTransformerSampler(DnnBase):
grid : GpuDnnTransformerGrid
grid : GpuDnnTransformerGrid
Grid that contains the coordinates of the pixels to be sampled from
Grid that contains the coordinates of the pixels to be sampled from
the inputs images.
the inputs images.
desc : GpuDnnTransformerDesc
Spatial transformer descriptor
"""
"""
context_name
=
infer_context_name
(
desc
)
context_name
=
infer_context_name
(
img
,
grid
)
if
(
not
isinstance
(
desc
.
type
,
CDataType
)
or
desc
.
type
.
ctype
!=
'cudnnSpatialTransformerDescriptor_t'
):
raise
ValueError
(
'desc must be cudnnSpatialTransformerDescriptor_t'
)
img
=
gpu_contiguous
(
as_gpuarray_variable
(
img
,
context_name
))
img
=
gpu_contiguous
(
as_gpuarray_variable
(
img
,
context_name
))
if
img
.
type
.
ndim
!=
4
:
if
img
.
type
.
ndim
!=
4
:
...
@@ -2909,20 +2837,16 @@ class GpuDnnTransformerSampler(DnnBase):
...
@@ -2909,20 +2837,16 @@ class GpuDnnTransformerSampler(DnnBase):
broadcastable
=
img
.
type
.
ndim
*
(
False
,),
broadcastable
=
img
.
type
.
ndim
*
(
False
,),
context_name
=
context_name
)()
context_name
=
context_name
)()
inputs
=
[
img
,
grid
,
desc
]
inputs
=
[
img
,
grid
]
outputs
=
[
out
]
outputs
=
[
out
]
return
Apply
(
self
,
inputs
,
outputs
)
return
Apply
(
self
,
inputs
,
outputs
)
def
grad
(
self
,
inputs
,
grads
):
def
grad
(
self
,
inputs
,
grads
):
img
,
grid
,
desc
=
inputs
img
,
grid
=
inputs
dy
=
grads
[
0
]
dy
=
grads
[
0
]
dimg
,
dgrid
=
GpuDnnTransformerGradI
()(
img
,
grid
,
dy
,
desc
)
dimg
,
dgrid
=
GpuDnnTransformerGradI
()(
img
,
grid
,
dy
)
return
[
dimg
,
dgrid
,
DisconnectedType
()()]
return
[
dimg
,
dgrid
]
def
connection_pattern
(
self
,
node
):
# not connected to desc
return
[[
1
],
[
1
],
[
0
]]
class
GpuDnnTransformerGradI
(
DnnBase
):
class
GpuDnnTransformerGradI
(
DnnBase
):
...
@@ -2930,7 +2854,7 @@ class GpuDnnTransformerGradI(DnnBase):
...
@@ -2930,7 +2854,7 @@ class GpuDnnTransformerGradI(DnnBase):
Gradient of inputs Op for cuDNN Spatial Transformer.
Gradient of inputs Op for cuDNN Spatial Transformer.
"""
"""
__props__
=
()
__props__
=
()
_cop_num_inputs
=
4
_cop_num_inputs
=
3
_cop_num_outputs
=
2
_cop_num_outputs
=
2
_f16_ok
=
True
_f16_ok
=
True
check_input
=
False
check_input
=
False
...
@@ -2938,12 +2862,8 @@ class GpuDnnTransformerGradI(DnnBase):
...
@@ -2938,12 +2862,8 @@ class GpuDnnTransformerGradI(DnnBase):
def
__init__
(
self
):
def
__init__
(
self
):
DnnBase
.
__init__
(
self
,
[
"c_code/dnn_sptf_gi.c"
],
"APPLY_SPECIFIC(dnn_sptf_gi)"
)
DnnBase
.
__init__
(
self
,
[
"c_code/dnn_sptf_gi.c"
],
"APPLY_SPECIFIC(dnn_sptf_gi)"
)
def
make_node
(
self
,
img
,
grid
,
dy
,
desc
):
def
make_node
(
self
,
img
,
grid
,
dy
):
context_name
=
infer_context_name
(
img
,
grid
,
dy
,
desc
)
context_name
=
infer_context_name
(
img
,
grid
,
dy
)
if
(
not
isinstance
(
desc
.
type
,
CDataType
)
or
desc
.
type
.
ctype
!=
'cudnnSpatialTransformerDescriptor_t'
):
raise
ValueError
(
'desc must be cudnnSpatialTransformerDescriptor_t'
)
img
=
as_gpuarray_variable
(
gpu_contiguous
(
img
),
context_name
)
img
=
as_gpuarray_variable
(
gpu_contiguous
(
img
),
context_name
)
if
img
.
ndim
!=
4
:
if
img
.
ndim
!=
4
:
...
@@ -2960,22 +2880,18 @@ class GpuDnnTransformerGradI(DnnBase):
...
@@ -2960,22 +2880,18 @@ class GpuDnnTransformerGradI(DnnBase):
dimg
=
img
.
type
()
dimg
=
img
.
type
()
dgrid
=
grid
.
type
()
dgrid
=
grid
.
type
()
inputs
=
[
img
,
grid
,
dy
,
desc
]
inputs
=
[
img
,
grid
,
dy
]
outputs
=
[
dimg
,
dgrid
]
outputs
=
[
dimg
,
dgrid
]
return
Apply
(
self
,
inputs
,
outputs
)
return
Apply
(
self
,
inputs
,
outputs
)
def
connection_pattern
(
self
,
node
):
# not connected to desc
return
[[
1
,
1
],
[
1
,
1
],
[
1
,
1
],
[
0
,
0
]]
class
GpuDnnTransformerGradT
(
DnnBase
):
class
GpuDnnTransformerGradT
(
DnnBase
):
"""
"""
Gradient of affine transformations Op for cuDNN Spatial Transformer.
Gradient of affine transformations Op for cuDNN Spatial Transformer.
"""
"""
__props__
=
()
__props__
=
()
_cop_num_inputs
=
2
_cop_num_inputs
=
1
_cop_num_outputs
=
1
_cop_num_outputs
=
1
_f16_ok
=
True
_f16_ok
=
True
check_input
=
False
check_input
=
False
...
@@ -2983,12 +2899,8 @@ class GpuDnnTransformerGradT(DnnBase):
...
@@ -2983,12 +2899,8 @@ class GpuDnnTransformerGradT(DnnBase):
def
__init__
(
self
):
def
__init__
(
self
):
DnnBase
.
__init__
(
self
,
[
"c_code/dnn_sptf_gt.c"
],
"APPLY_SPECIFIC(dnn_sptf_gt)"
)
DnnBase
.
__init__
(
self
,
[
"c_code/dnn_sptf_gt.c"
],
"APPLY_SPECIFIC(dnn_sptf_gt)"
)
def
make_node
(
self
,
dgrid
,
desc
):
def
make_node
(
self
,
dgrid
):
context_name
=
infer_context_name
(
desc
)
context_name
=
infer_context_name
(
dgrid
)
if
(
not
isinstance
(
desc
.
type
,
CDataType
)
or
desc
.
type
.
ctype
!=
'cudnnSpatialTransformerDescriptor_t'
):
raise
ValueError
(
'desc must be cudnnSpatialTransformerDescriptor_t'
)
dgrid
=
as_gpuarray_variable
(
dgrid
,
context_name
)
dgrid
=
as_gpuarray_variable
(
dgrid
,
context_name
)
assert
dgrid
.
dtype
in
(
'float16'
,
'float32'
,
'float64'
)
assert
dgrid
.
dtype
in
(
'float16'
,
'float32'
,
'float64'
)
...
@@ -2997,15 +2909,11 @@ class GpuDnnTransformerGradT(DnnBase):
...
@@ -2997,15 +2909,11 @@ class GpuDnnTransformerGradT(DnnBase):
dtheta
=
GpuArrayType
(
dtype
=
dgrid
.
dtype
,
dtheta
=
GpuArrayType
(
dtype
=
dgrid
.
dtype
,
broadcastable
=
(
dgrid
.
type
.
ndim
-
1
)
*
(
False
,),
broadcastable
=
(
dgrid
.
type
.
ndim
-
1
)
*
(
False
,),
context_name
=
context_name
)()
context_name
=
context_name
)()
inputs
=
[
dgrid
,
desc
]
inputs
=
[
dgrid
]
outputs
=
[
dtheta
]
outputs
=
[
dtheta
]
return
Apply
(
self
,
inputs
,
outputs
)
return
Apply
(
self
,
inputs
,
outputs
)
def
connection_pattern
(
self
,
node
):
# not connected to desc
return
[[
1
],
[
0
]]
def
dnn_spatialtf
(
img
,
theta
,
scale_width
=
1
,
scale_height
=
1
,
precision
=
theano
.
config
.
floatX
):
def
dnn_spatialtf
(
img
,
theta
,
scale_width
=
1
,
scale_height
=
1
,
precision
=
theano
.
config
.
floatX
):
"""
"""
...
@@ -3050,10 +2958,7 @@ def dnn_spatialtf(img, theta, scale_width=1, scale_height=1, precision=theano.co
...
@@ -3050,10 +2958,7 @@ def dnn_spatialtf(img, theta, scale_width=1, scale_height=1, precision=theano.co
theano
.
tensor
.
ceil
(
img
.
shape
[
3
]
*
scale_width
))
theano
.
tensor
.
ceil
(
img
.
shape
[
3
]
*
scale_width
))
out_dims
=
tuple
([
as_scalar
(
v
)
.
astype
(
'int64'
)
for
v
in
out_dims
])
out_dims
=
tuple
([
as_scalar
(
v
)
.
astype
(
'int64'
)
for
v
in
out_dims
])
# Create spatial transformer descriptor
context_name
=
infer_context_name
(
img
,
theta
)
desc
=
GpuDnnTransformerDesc
(
precision
)(
out_dims
)
context_name
=
infer_context_name
(
desc
)
img
=
gpu_contiguous
(
as_gpuarray_variable
(
img
,
context_name
))
img
=
gpu_contiguous
(
as_gpuarray_variable
(
img
,
context_name
))
theta
=
gpu_contiguous
(
as_gpuarray_variable
(
theta
,
context_name
))
theta
=
gpu_contiguous
(
as_gpuarray_variable
(
theta
,
context_name
))
...
@@ -3063,8 +2968,8 @@ def dnn_spatialtf(img, theta, scale_width=1, scale_height=1, precision=theano.co
...
@@ -3063,8 +2968,8 @@ def dnn_spatialtf(img, theta, scale_width=1, scale_height=1, precision=theano.co
assert
theta
.
ndim
==
3
assert
theta
.
ndim
==
3
# Setup spatial transformer
# Setup spatial transformer
grid
=
GpuDnnTransformerGrid
()(
theta
,
desc
)
grid
=
GpuDnnTransformerGrid
()(
theta
,
out_dims
)
sampler
=
GpuDnnTransformerSampler
()(
img
,
grid
,
desc
)
sampler
=
GpuDnnTransformerSampler
()(
img
,
grid
)
return
sampler
return
sampler
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论