Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
6d71e317
提交
6d71e317
authored
8月 18, 2017
作者:
Frédéric Bastien
提交者:
GitHub
8月 18, 2017
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #6061 from joaovictortr/spatialtf_networks
Spatial Transformer Network Ops using cuDNN
上级
92bf47f1
e754ca98
显示空白字符变更
内嵌
并排
正在显示
7 个修改的文件
包含
1056 行增加
和
0 行删除
+1056
-0
dnn.txt
doc/library/gpuarray/dnn.txt
+2
-0
dnn_sptf_gi.c
theano/gpuarray/c_code/dnn_sptf_gi.c
+194
-0
dnn_sptf_grid.c
theano/gpuarray/c_code/dnn_sptf_grid.c
+131
-0
dnn_sptf_gt.c
theano/gpuarray/c_code/dnn_sptf_gt.c
+110
-0
dnn_sptf_sampler.c
theano/gpuarray/c_code/dnn_sptf_sampler.c
+165
-0
dnn.py
theano/gpuarray/dnn.py
+231
-0
test_dnn.py
theano/gpuarray/tests/test_dnn.py
+223
-0
没有找到文件。
doc/library/gpuarray/dnn.txt
浏览文件 @
6d71e317
...
...
@@ -146,6 +146,8 @@ To get an error if Theano can not use cuDNN, use this Theano flag:
- Softmax:
- You can manually use the op :class:`GpuDnnSoftmax
<theano.gpuarray.dnn.GpuDnnSoftmax>` to use its extra feature.
- Spatial Transformer:
- :func:`theano.gpuarray.dnn.dnn_spatialtf`.
List of Implemented Operations
==============================
...
...
theano/gpuarray/c_code/dnn_sptf_gi.c
0 → 100644
浏览文件 @
6d71e317
#section support_code_struct
cudnnSpatialTransformerDescriptor_t
APPLY_SPECIFIC
(
sptf
);
cudnnTensorDescriptor_t
APPLY_SPECIFIC
(
xdesc
);
cudnnTensorDescriptor_t
APPLY_SPECIFIC
(
dxdesc
);
cudnnTensorDescriptor_t
APPLY_SPECIFIC
(
dydesc
);
#section init_code_struct
APPLY_SPECIFIC
(
sptf
)
=
NULL
;
APPLY_SPECIFIC
(
xdesc
)
=
NULL
;
APPLY_SPECIFIC
(
dxdesc
)
=
NULL
;
APPLY_SPECIFIC
(
dydesc
)
=
NULL
;
{
cudnnStatus_t
err
=
CUDNN_STATUS_SUCCESS
;
err
=
cudnnCreateSpatialTransformerDescriptor
(
&
APPLY_SPECIFIC
(
sptf
));
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
PyErr_Format
(
PyExc_MemoryError
,
"GpuDnnTransformerGradI: could not allocate spatial transformer descriptor (sptf): %s"
,
cudnnGetErrorString
(
err
));
FAIL
;
}
err
=
cudnnCreateTensorDescriptor
(
&
APPLY_SPECIFIC
(
xdesc
)
);
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
PyErr_Format
(
PyExc_MemoryError
,
"GpuDnnTransformerGradI: failed to allocate cuDNN tensor descriptor xdesc: %s"
,
cudnnGetErrorString
(
err
)
);
FAIL
;
}
err
=
cudnnCreateTensorDescriptor
(
&
APPLY_SPECIFIC
(
dxdesc
)
);
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
PyErr_Format
(
PyExc_MemoryError
,
"GpuDnnTransformerGradI: failed to allocate cuDNN tensor descriptor dxdesc: %s"
,
cudnnGetErrorString
(
err
)
);
FAIL
;
}
err
=
cudnnCreateTensorDescriptor
(
&
APPLY_SPECIFIC
(
dydesc
)
);
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
PyErr_Format
(
PyExc_MemoryError
,
"GpuDnnTransformerGradI: failed to allocate cuDNN tensor descriptor dydesc: %s"
,
cudnnGetErrorString
(
err
)
);
FAIL
;
}
}
#section cleanup_code_struct
if
(
APPLY_SPECIFIC
(
sptf
)
!=
NULL
)
cudnnDestroySpatialTransformerDescriptor
(
APPLY_SPECIFIC
(
sptf
)
);
if
(
APPLY_SPECIFIC
(
xdesc
)
!=
NULL
)
cudnnDestroyTensorDescriptor
(
APPLY_SPECIFIC
(
xdesc
)
);
if
(
APPLY_SPECIFIC
(
dxdesc
)
!=
NULL
)
cudnnDestroyTensorDescriptor
(
APPLY_SPECIFIC
(
dxdesc
)
);
if
(
APPLY_SPECIFIC
(
dydesc
)
!=
NULL
)
cudnnDestroyTensorDescriptor
(
APPLY_SPECIFIC
(
dydesc
)
);
#section support_code_struct
int
APPLY_SPECIFIC
(
dnn_sptf_gi
)(
PyGpuArrayObject
*
input
,
PyGpuArrayObject
*
grid
,
PyGpuArrayObject
*
dy
,
PyGpuArrayObject
**
input_grad
,
PyGpuArrayObject
**
grid_grad
,
cudnnHandle_t
_handle
)
{
PyGpuContextObject
*
gpu_ctx
=
input
->
context
;
void
*
alpha_p
;
void
*
beta_p
;
double
alpha
=
1
.
0
,
beta
=
0
.
0
;
float
af
=
alpha
,
bf
=
beta
;
int
out_dims
[
4
];
cudnnDataType_t
dt
;
cudnnStatus_t
err
=
CUDNN_STATUS_SUCCESS
;
switch
(
input
->
ga
.
typecode
)
{
case
GA_DOUBLE
:
alpha_p
=
(
void
*
)
&
alpha
;
beta_p
=
(
void
*
)
&
beta
;
dt
=
CUDNN_DATA_DOUBLE
;
break
;
case
GA_FLOAT
:
alpha_p
=
(
void
*
)
&
af
;
beta_p
=
(
void
*
)
&
bf
;
dt
=
CUDNN_DATA_FLOAT
;
break
;
case
GA_HALF
:
alpha_p
=
(
void
*
)
&
af
;
beta_p
=
(
void
*
)
&
bf
;
dt
=
CUDNN_DATA_HALF
;
break
;
default:
PyErr_SetString
(
PyExc_TypeError
,
"GpuDnnTransformerGradI: unsupported type for input in spatial transformer gradients"
);
return
1
;
}
if
(
grid
->
ga
.
typecode
!=
GA_FLOAT
&&
grid
->
ga
.
typecode
!=
GA_DOUBLE
&&
grid
->
ga
.
typecode
!=
GA_HALF
)
{
PyErr_SetString
(
PyExc_TypeError
,
"GpuDnnTransformerGradI: unsupported data type for grid in spatial transformer gradients."
);
return
1
;
}
if
(
theano_prep_output
(
input_grad
,
PyGpuArray_NDIM
(
input
),
PyGpuArray_DIMS
(
input
),
input
->
ga
.
typecode
,
GA_C_ORDER
,
gpu_ctx
)
!=
0
)
return
1
;
if
(
theano_prep_output
(
grid_grad
,
PyGpuArray_NDIM
(
grid
),
PyGpuArray_DIMS
(
grid
),
grid
->
ga
.
typecode
,
GA_C_ORDER
,
gpu_ctx
)
!=
0
)
return
1
;
// Obtain output dimensions to setup descriptor
out_dims
[
0
]
=
(
int
)
PyGpuArray_DIM
(
input
,
0
);
// num_images
out_dims
[
1
]
=
(
int
)
PyGpuArray_DIM
(
input
,
1
);
// num_channels
out_dims
[
2
]
=
(
int
)
PyGpuArray_DIM
(
grid
,
1
);
// grid height
out_dims
[
3
]
=
(
int
)
PyGpuArray_DIM
(
grid
,
2
);
// grid width
// Currently, only the bilinear sampler is supported by cuDNN,
// so the sampler method is currently not available as a parameter
err
=
cudnnSetSpatialTransformerNdDescriptor
(
APPLY_SPECIFIC
(
sptf
),
CUDNN_SAMPLER_BILINEAR
,
dt
,
4
,
out_dims
);
if
(
CUDNN_STATUS_SUCCESS
!=
err
)
{
PyErr_Format
(
PyExc_MemoryError
,
"GpuDnnTransformerGradI: could not initialize descriptor (sptf): %s"
,
cudnnGetErrorString
(
err
)
);
return
1
;
}
if
(
c_set_tensorNd
(
input
,
APPLY_SPECIFIC
(
xdesc
)
)
!=
0
)
return
1
;
if
(
c_set_tensorNd
(
dy
,
APPLY_SPECIFIC
(
dydesc
)
)
!=
0
)
return
1
;
if
(
c_set_tensorNd
(
*
input_grad
,
APPLY_SPECIFIC
(
dxdesc
)
)
!=
0
)
return
1
;
// Directly return the outputs if any of the dimensions is 0.
// (cuDNN does not support zero-length dimensions.)
if
(
PyGpuArray_SIZE
(
*
input_grad
)
==
0
||
PyGpuArray_SIZE
(
*
grid_grad
)
==
0
)
return
0
;
cuda_enter
(
gpu_ctx
->
ctx
);
cuda_wait
(
input
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_READ
);
cuda_wait
(
grid
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_READ
);
cuda_wait
(
dy
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_READ
);
cuda_wait
(
(
*
input_grad
)
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_WRITE
);
cuda_wait
(
(
*
grid_grad
)
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_WRITE
);
err
=
cudnnSpatialTfSamplerBackward
(
_handle
,
APPLY_SPECIFIC
(
sptf
),
alpha_p
,
APPLY_SPECIFIC
(
xdesc
),
PyGpuArray_DEV_DATA
(
input
),
beta_p
,
APPLY_SPECIFIC
(
dxdesc
),
PyGpuArray_DEV_DATA
(
*
input_grad
),
alpha_p
,
APPLY_SPECIFIC
(
dydesc
),
PyGpuArray_DEV_DATA
(
dy
),
PyGpuArray_DEV_DATA
(
grid
),
beta_p
,
PyGpuArray_DEV_DATA
(
*
grid_grad
)
);
cuda_record
(
input
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_READ
);
cuda_record
(
grid
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_READ
);
cuda_record
(
dy
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_READ
);
cuda_record
(
(
*
input_grad
)
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_WRITE
);
cuda_record
(
(
*
grid_grad
)
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_WRITE
);
cuda_exit
(
gpu_ctx
->
ctx
);
if
(
CUDNN_STATUS_SUCCESS
!=
err
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"GpuDnnTransformerGradI: failed to compute gradients of the inputs: %s"
,
cudnnGetErrorString
(
err
)
);
return
1
;
}
return
0
;
}
theano/gpuarray/c_code/dnn_sptf_grid.c
0 → 100644
浏览文件 @
6d71e317
#section support_code_struct
cudnnSpatialTransformerDescriptor_t
APPLY_SPECIFIC
(
sptf
);
#section init_code_struct
APPLY_SPECIFIC
(
sptf
)
=
NULL
;
{
cudnnStatus_t
err
=
CUDNN_STATUS_SUCCESS
;
if
((
err
=
cudnnCreateSpatialTransformerDescriptor
(
&
APPLY_SPECIFIC
(
sptf
)))
!=
CUDNN_STATUS_SUCCESS
)
{
PyErr_Format
(
PyExc_MemoryError
,
"GpuDnnTransformerGrid: could not allocate spatial transformer descriptor (sptf): %s"
,
cudnnGetErrorString
(
err
));
FAIL
;
}
}
#section cleanup_code_struct
if
(
APPLY_SPECIFIC
(
sptf
)
!=
NULL
)
{
cudnnDestroySpatialTransformerDescriptor
(
APPLY_SPECIFIC
(
sptf
));
}
#section support_code_struct
int
APPLY_SPECIFIC
(
dnn_sptf_grid
)(
PyGpuArrayObject
*
theta
,
PyArrayObject
*
out_dims
,
PyGpuArrayObject
**
grid
,
cudnnHandle_t
_handle
)
{
PyGpuContextObject
*
gpu_ctx
=
theta
->
context
;
size_t
grid_dims
[
4
];
int
num_images
,
num_channels
,
height
,
width
;
int
desc_dims
[
4
];
cudnnDataType_t
dt
;
cudnnStatus_t
err
=
CUDNN_STATUS_SUCCESS
;
switch
(
theta
->
ga
.
typecode
)
{
case
GA_DOUBLE
:
dt
=
CUDNN_DATA_DOUBLE
;
break
;
case
GA_FLOAT
:
dt
=
CUDNN_DATA_FLOAT
;
break
;
case
GA_HALF
:
dt
=
CUDNN_DATA_HALF
;
break
;
default
:
PyErr_SetString
(
PyExc_TypeError
,
"GpuDnnTransformerGrid: unsupported data type for theta in spatial transformer."
);
return
1
;
}
if
(
PyGpuArray_DIM
(
theta
,
1
)
!=
2
||
PyGpuArray_DIM
(
theta
,
2
)
!=
3
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"GpuDnnTransformerGrid: incorrect dimensions for theta, expected (%d, %d, %d), got (%d, %d, %d)"
,
PyGpuArray_DIMS
(
theta
)[
0
],
2
,
3
,
PyGpuArray_DIMS
(
theta
)[
0
],
PyGpuArray_DIMS
(
theta
)[
1
],
PyGpuArray_DIMS
(
theta
)[
2
]
);
return
1
;
}
if
(
PyArray_NDIM
(
out_dims
)
!=
1
||
PyArray_SIZE
(
out_dims
)
!=
4
)
{
PyErr_SetString
(
PyExc_MemoryError
,
"GpuDnnTransformerGrid: out_dims must have 4 elements."
);
return
1
;
}
// Obtain output dimensions
num_images
=
(
int
)
*
(
(
npy_int64
*
)
PyArray_GETPTR1
(
out_dims
,
0
)
);
num_channels
=
(
int
)
*
(
(
npy_int64
*
)
PyArray_GETPTR1
(
out_dims
,
1
)
);
height
=
(
int
)
*
(
(
npy_int64
*
)
PyArray_GETPTR1
(
out_dims
,
2
)
);
width
=
(
int
)
*
(
(
npy_int64
*
)
PyArray_GETPTR1
(
out_dims
,
3
)
);
// Set transformed output dimensions to setup the descriptor
desc_dims
[
0
]
=
num_images
;
desc_dims
[
1
]
=
num_channels
;
desc_dims
[
2
]
=
height
;
desc_dims
[
3
]
=
width
;
// Set sampling grid dimensions
grid_dims
[
0
]
=
num_images
;
grid_dims
[
1
]
=
height
;
grid_dims
[
2
]
=
width
;
grid_dims
[
3
]
=
2
;
// Currently, only the bilinear sampler is supported by cuDNN,
// so the sampler method is currently not available as a parameter
err
=
cudnnSetSpatialTransformerNdDescriptor
(
APPLY_SPECIFIC
(
sptf
),
CUDNN_SAMPLER_BILINEAR
,
dt
,
4
,
desc_dims
);
if
(
CUDNN_STATUS_SUCCESS
!=
err
)
{
PyErr_Format
(
PyExc_MemoryError
,
"GpuDnnTransformerGrid: could not initialize descriptor (sptf): %s"
,
cudnnGetErrorString
(
err
)
);
return
1
;
}
if
(
theano_prep_output
(
grid
,
4
,
grid_dims
,
theta
->
ga
.
typecode
,
GA_C_ORDER
,
gpu_ctx
)
!=
0
)
{
PyErr_SetString
(
PyExc_RuntimeError
,
"GpuDnnTransformerGrid: could not allocate memory for grid of coordinates"
);
return
1
;
}
cuda_enter
(
gpu_ctx
->
ctx
);
cuda_wait
(
theta
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_READ
);
cuda_wait
(
(
*
grid
)
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_WRITE
);
err
=
cudnnSpatialTfGridGeneratorForward
(
_handle
,
APPLY_SPECIFIC
(
sptf
),
PyGpuArray_DEV_DATA
(
theta
),
PyGpuArray_DEV_DATA
(
*
grid
)
);
cuda_record
(
theta
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_READ
);
cuda_record
(
(
*
grid
)
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_WRITE
);
cuda_exit
(
gpu_ctx
->
ctx
);
if
(
CUDNN_STATUS_SUCCESS
!=
err
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"GpuDnnTransformerGrid: could not create grid of coordinates: %s"
,
cudnnGetErrorString
(
err
)
);
return
1
;
}
return
0
;
}
theano/gpuarray/c_code/dnn_sptf_gt.c
0 → 100644
浏览文件 @
6d71e317
#section support_code_struct
cudnnSpatialTransformerDescriptor_t
APPLY_SPECIFIC
(
sptf
);
#section init_code_struct
APPLY_SPECIFIC
(
sptf
)
=
NULL
;
{
cudnnStatus_t
err
=
CUDNN_STATUS_SUCCESS
;
if
((
err
=
cudnnCreateSpatialTransformerDescriptor
(
&
APPLY_SPECIFIC
(
sptf
)))
!=
CUDNN_STATUS_SUCCESS
)
{
PyErr_Format
(
PyExc_MemoryError
,
"GpuDnnTransformerGradT: could not allocate spatial transformer descriptor (sptf): %s"
,
cudnnGetErrorString
(
err
));
FAIL
;
}
}
#section cleanup_code_struct
if
(
APPLY_SPECIFIC
(
sptf
)
!=
NULL
)
cudnnDestroySpatialTransformerDescriptor
(
APPLY_SPECIFIC
(
sptf
));
#section support_code_struct
int
APPLY_SPECIFIC
(
dnn_sptf_gt
)(
PyGpuArrayObject
*
dgrid
,
PyGpuArrayObject
**
dtheta
,
cudnnHandle_t
_handle
)
{
PyGpuContextObject
*
gpu_ctx
=
dgrid
->
context
;
int
num_images
,
height
,
width
;
int
desc_dims
[
4
];
size_t
dtheta_dims
[
3
];
cudnnDataType_t
dt
;
cudnnStatus_t
err
=
CUDNN_STATUS_SUCCESS
;
switch
(
dgrid
->
ga
.
typecode
)
{
case
GA_DOUBLE
:
dt
=
CUDNN_DATA_DOUBLE
;
break
;
case
GA_FLOAT
:
dt
=
CUDNN_DATA_FLOAT
;
break
;
case
GA_HALF
:
dt
=
CUDNN_DATA_HALF
;
break
;
default:
PyErr_SetString
(
PyExc_TypeError
,
"GpuDnnTransformerGradT: unsupported data type for dgrid in spatial transformer."
);
return
1
;
}
num_images
=
(
int
)
PyGpuArray_DIM
(
dgrid
,
0
);
height
=
(
int
)
PyGpuArray_DIM
(
dgrid
,
1
);
width
=
(
int
)
PyGpuArray_DIM
(
dgrid
,
2
);
dtheta_dims
[
0
]
=
num_images
;
dtheta_dims
[
1
]
=
2
;
dtheta_dims
[
2
]
=
3
;
if
(
theano_prep_output
(
dtheta
,
3
,
dtheta_dims
,
dgrid
->
ga
.
typecode
,
GA_C_ORDER
,
gpu_ctx
)
!=
0
)
return
1
;
desc_dims
[
0
]
=
num_images
;
// Assume number of channels is 1, because the information is not
// available or relevant here
desc_dims
[
1
]
=
1
;
desc_dims
[
2
]
=
height
;
desc_dims
[
3
]
=
width
;
// Currently, only the bilinear sampler is supported by cuDNN,
// so the sampler method is currently not available as a parameter
err
=
cudnnSetSpatialTransformerNdDescriptor
(
APPLY_SPECIFIC
(
sptf
),
CUDNN_SAMPLER_BILINEAR
,
dt
,
4
,
desc_dims
);
if
(
CUDNN_STATUS_SUCCESS
!=
err
)
{
PyErr_Format
(
PyExc_MemoryError
,
"GpuDnnTransformerGrid: could not initialize descriptor (sptf): %s"
,
cudnnGetErrorString
(
err
)
);
return
1
;
}
cuda_enter
(
gpu_ctx
->
ctx
);
cuda_wait
(
dgrid
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_READ
);
cuda_wait
(
(
*
dtheta
)
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_WRITE
);
err
=
cudnnSpatialTfGridGeneratorBackward
(
_handle
,
APPLY_SPECIFIC
(
sptf
),
PyGpuArray_DEV_DATA
(
dgrid
),
PyGpuArray_DEV_DATA
(
*
dtheta
)
);
cuda_record
(
dgrid
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_READ
);
cuda_record
(
(
*
dtheta
)
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_WRITE
);
cuda_exit
(
gpu_ctx
->
ctx
);
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"GpuDnnTransformerGradT: could not compute gradients of the affine transformation: %s"
,
cudnnGetErrorString
(
err
)
);
return
1
;
}
return
0
;
}
theano/gpuarray/c_code/dnn_sptf_sampler.c
0 → 100644
浏览文件 @
6d71e317
#section support_code_struct
cudnnSpatialTransformerDescriptor_t
APPLY_SPECIFIC
(
sptf
);
cudnnTensorDescriptor_t
APPLY_SPECIFIC
(
xdesc
);
cudnnTensorDescriptor_t
APPLY_SPECIFIC
(
ydesc
);
#section init_code_struct
APPLY_SPECIFIC
(
sptf
)
=
NULL
;
APPLY_SPECIFIC
(
xdesc
)
=
NULL
;
APPLY_SPECIFIC
(
ydesc
)
=
NULL
;
{
cudnnStatus_t
err
=
CUDNN_STATUS_SUCCESS
;
err
=
cudnnCreateSpatialTransformerDescriptor
(
&
APPLY_SPECIFIC
(
sptf
));
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
PyErr_Format
(
PyExc_MemoryError
,
"GpuDnnTransformerSampler: could not allocate spatial transformer descriptor (sptf): %s"
,
cudnnGetErrorString
(
err
));
FAIL
;
}
err
=
cudnnCreateTensorDescriptor
(
&
APPLY_SPECIFIC
(
xdesc
)
);
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
PyErr_Format
(
PyExc_MemoryError
,
"GpuDnnTransformerSampler: failed to allocate cuDNN tensor descriptor xdesc: %s"
,
cudnnGetErrorString
(
err
)
);
FAIL
;
}
err
=
cudnnCreateTensorDescriptor
(
&
APPLY_SPECIFIC
(
ydesc
)
);
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
PyErr_Format
(
PyExc_MemoryError
,
"GpuDnnTransformerSampler: failed to allocate cuDNN tensor descriptor ydesc: %s"
,
cudnnGetErrorString
(
err
)
);
FAIL
;
}
}
#section cleanup_code_struct
if
(
APPLY_SPECIFIC
(
sptf
)
!=
NULL
)
cudnnDestroySpatialTransformerDescriptor
(
APPLY_SPECIFIC
(
sptf
));
if
(
APPLY_SPECIFIC
(
xdesc
)
!=
NULL
)
cudnnDestroyTensorDescriptor
(
APPLY_SPECIFIC
(
xdesc
)
);
if
(
APPLY_SPECIFIC
(
ydesc
)
!=
NULL
)
cudnnDestroyTensorDescriptor
(
APPLY_SPECIFIC
(
ydesc
)
);
#section support_code_struct
int
APPLY_SPECIFIC
(
dnn_sptf_sampler
)(
PyGpuArrayObject
*
input
,
PyGpuArrayObject
*
grid
,
PyGpuArrayObject
**
output
,
cudnnHandle_t
_handle
)
{
PyGpuContextObject
*
gpu_ctx
=
input
->
context
;
void
*
alpha_p
;
void
*
beta_p
;
double
alpha
=
1
.
0
,
beta
=
0
.
0
;
float
af
=
alpha
,
bf
=
beta
;
size_t
out_dims
[
4
];
int
desc_dims
[
4
];
cudnnDataType_t
dt
;
cudnnStatus_t
err
=
CUDNN_STATUS_SUCCESS
;
switch
(
input
->
ga
.
typecode
)
{
case
GA_DOUBLE
:
alpha_p
=
(
void
*
)
&
alpha
;
beta_p
=
(
void
*
)
&
beta
;
dt
=
CUDNN_DATA_DOUBLE
;
break
;
case
GA_FLOAT
:
alpha_p
=
(
void
*
)
&
af
;
beta_p
=
(
void
*
)
&
bf
;
dt
=
CUDNN_DATA_FLOAT
;
break
;
case
GA_HALF
:
alpha_p
=
(
void
*
)
&
af
;
beta_p
=
(
void
*
)
&
bf
;
dt
=
CUDNN_DATA_HALF
;
break
;
default:
PyErr_SetString
(
PyExc_TypeError
,
"GpuDnnTransformer: unsupported type for input in spatial transformer."
);
return
1
;
}
out_dims
[
0
]
=
(
size_t
)
PyGpuArray_DIM
(
input
,
0
);
// num_images
out_dims
[
1
]
=
(
size_t
)
PyGpuArray_DIM
(
input
,
1
);
// num_channels
out_dims
[
2
]
=
(
size_t
)
PyGpuArray_DIM
(
grid
,
1
);
// grid height
out_dims
[
3
]
=
(
size_t
)
PyGpuArray_DIM
(
grid
,
2
);
// grid width
// Set output dimensions for the descriptor setup
desc_dims
[
0
]
=
(
int
)
out_dims
[
0
];
desc_dims
[
1
]
=
(
int
)
out_dims
[
1
];
desc_dims
[
2
]
=
(
int
)
out_dims
[
2
];
desc_dims
[
3
]
=
(
int
)
out_dims
[
3
];
if
(
out_dims
[
0
]
==
0
||
out_dims
[
1
]
==
0
||
out_dims
[
2
]
==
0
||
out_dims
[
3
]
==
0
)
{
PyErr_SetString
(
PyExc_RuntimeError
,
"GpuDnnTransformerSampler: one of the sampler dimensions is zero"
);
return
1
;
}
if
(
theano_prep_output
(
output
,
4
,
out_dims
,
input
->
ga
.
typecode
,
GA_C_ORDER
,
gpu_ctx
)
!=
0
)
{
PyErr_SetString
(
PyExc_MemoryError
,
"GpuDnnTransformerSampler: could not allocate memory for grid sampler"
);
return
1
;
}
// Currently, only the bilinear sampler is supported by cuDNN,
// so the sampler method is currently not available as a parameter
err
=
cudnnSetSpatialTransformerNdDescriptor
(
APPLY_SPECIFIC
(
sptf
),
CUDNN_SAMPLER_BILINEAR
,
dt
,
4
,
desc_dims
);
if
(
CUDNN_STATUS_SUCCESS
!=
err
)
{
PyErr_Format
(
PyExc_MemoryError
,
"GpuDnnTransformerSampler: could not initialize descriptor: %s"
,
cudnnGetErrorString
(
err
)
);
return
1
;
}
if
(
c_set_tensorNd
(
input
,
APPLY_SPECIFIC
(
xdesc
)
)
!=
0
)
return
1
;
if
(
c_set_tensorNd
(
*
output
,
APPLY_SPECIFIC
(
ydesc
)
)
!=
0
)
return
1
;
cuda_enter
(
gpu_ctx
->
ctx
);
cuda_wait
(
input
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_READ
);
cuda_wait
(
grid
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_READ
);
cuda_wait
(
(
*
output
)
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_WRITE
);
err
=
cudnnSpatialTfSamplerForward
(
_handle
,
APPLY_SPECIFIC
(
sptf
),
alpha_p
,
APPLY_SPECIFIC
(
xdesc
),
PyGpuArray_DEV_DATA
(
input
),
PyGpuArray_DEV_DATA
(
grid
),
beta_p
,
APPLY_SPECIFIC
(
ydesc
),
PyGpuArray_DEV_DATA
(
*
output
)
);
cuda_record
(
input
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_READ
);
cuda_record
(
grid
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_READ
);
cuda_record
(
(
*
output
)
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_WRITE
);
cuda_exit
(
gpu_ctx
->
ctx
);
if
(
CUDNN_STATUS_SUCCESS
!=
err
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"GpuDnnTransformerSampler: could not create grid sampler: %s"
,
cudnnGetErrorString
(
err
)
);
return
1
;
}
return
0
;
}
theano/gpuarray/dnn.py
浏览文件 @
6d71e317
...
...
@@ -13,6 +13,7 @@ from theano import Op, Apply, tensor, config, Variable
from
theano.scalar
import
(
as_scalar
,
constant
,
Log
,
get_scalar_type
,
int32
as
int_t
,
bool
as
bool_t
,
uint32
as
uint32_t
)
from
theano.tensor
import
as_tensor_variable
,
Argmax
from
theano.tensor.extra_ops
import
cpu_contiguous
from
theano.gradient
import
DisconnectedType
,
grad_not_implemented
from
theano.gof
import
Optimizer
,
local_optimizer
,
COp
,
ParamsType
,
EnumList
from
theano.gof.cmodule
import
GCC_compiler
...
...
@@ -2782,6 +2783,235 @@ def dnn_batch_normalization_test(inputs, gamma, beta, mean, var,
return
result
class
GpuDnnTransformerGrid
(
DnnBase
):
"""
Grid generator Op for cuDNN Spatial Transformer.
"""
__props__
=
()
_cop_num_inputs
=
2
_cop_num_outputs
=
1
_f16_ok
=
True
check_input
=
False
def
__init__
(
self
):
DnnBase
.
__init__
(
self
,
[
"c_code/dnn_sptf_grid.c"
],
"APPLY_SPECIFIC(dnn_sptf_grid)"
)
def
make_node
(
self
,
theta
,
out_dims
):
"""
Create a grid generator node for a cuDNN Spatial Transformer
Parameters
----------
theta : tensor
Affine transformation tensor containing one affine transformation
matrix per image. ``theta`` is usually generated by the localization
network.
out_dims : tuple
Dimensions of the transformed inputs, containing four elements, and is given
by (N, C, H, W), where N is the number of inputs, C the number of channels,
H and W are the height and width of each input.
"""
context_name
=
infer_context_name
(
theta
)
theta
=
gpu_contiguous
(
as_gpuarray_variable
(
theta
,
context_name
))
assert
theta
.
dtype
in
(
'float16'
,
'float32'
,
'float64'
)
assert
theta
.
ndim
==
3
out_dims
=
cpu_contiguous
(
as_tensor_variable
(
out_dims
))
assert
out_dims
.
dtype
in
theano
.
tensor
.
basic
.
integer_dtypes
assert
out_dims
.
ndim
==
1
# Ensure 64-bit ints are passed to the C code
out_dims
=
theano
.
tensor
.
basic
.
cast
(
out_dims
,
'int64'
)
grid
=
GpuArrayType
(
dtype
=
theta
.
dtype
,
broadcastable
=
(
theta
.
type
.
ndim
+
1
)
*
(
False
,),
context_name
=
context_name
)()
inputs
=
[
theta
,
out_dims
]
outputs
=
[
grid
]
return
Apply
(
self
,
inputs
,
outputs
)
def
grad
(
self
,
inputs
,
grads
):
theta
,
out_dims
=
inputs
dgrid
=
grads
[
0
]
dtheta
=
GpuDnnTransformerGradT
()(
dgrid
)
return
[
dtheta
,
grad_not_implemented
(
self
,
1
,
out_dims
)]
class
GpuDnnTransformerSampler
(
DnnBase
):
"""
Grid sampler Op for cuDNN Spatial Transformer.
"""
__props__
=
()
_cop_num_inputs
=
2
_cop_num_outputs
=
1
_f16_ok
=
True
check_input
=
False
def
__init__
(
self
):
DnnBase
.
__init__
(
self
,
[
"c_code/dnn_sptf_sampler.c"
],
"APPLY_SPECIFIC(dnn_sptf_sampler)"
)
def
make_node
(
self
,
img
,
grid
):
"""
Create a grid sampler node for a cuDNN Spatial Transformer
Parameters
----------
img : tensor
Images from which the pixels will be sampled. The implementation
assumes the tensor is in NCHW format, where N is the number of images,
C is the number of color channels, H is the height of the inputs, and
W is width of the inputs.
grid : GpuDnnTransformerGrid
Grid that contains the coordinates of the pixels to be sampled from
the inputs images.
"""
context_name
=
infer_context_name
(
img
,
grid
)
img
=
gpu_contiguous
(
as_gpuarray_variable
(
img
,
context_name
))
if
img
.
type
.
ndim
!=
4
:
raise
TypeError
(
'img must be a 4D tensor'
)
elif
img
.
dtype
not
in
(
'float16'
,
'float32'
,
'float64'
):
raise
TypeError
(
'img type must be floating-point'
)
grid
=
gpu_contiguous
(
as_gpuarray_variable
(
grid
,
context_name
))
if
grid
.
type
.
ndim
!=
4
:
raise
TypeError
(
'grid must be a 4D tensor'
)
elif
grid
.
dtype
not
in
(
'float16'
,
'float32'
,
'float64'
):
raise
TypeError
(
'grid type must be floating-point'
)
out
=
GpuArrayType
(
dtype
=
img
.
dtype
,
broadcastable
=
img
.
type
.
ndim
*
(
False
,),
context_name
=
context_name
)()
inputs
=
[
img
,
grid
]
outputs
=
[
out
]
return
Apply
(
self
,
inputs
,
outputs
)
def
grad
(
self
,
inputs
,
grads
):
img
,
grid
=
inputs
dy
=
grads
[
0
]
dimg
,
dgrid
=
GpuDnnTransformerGradI
()(
img
,
grid
,
dy
)
return
[
dimg
,
dgrid
]
class
GpuDnnTransformerGradI
(
DnnBase
):
"""
Gradient of inputs Op for cuDNN Spatial Transformer.
"""
__props__
=
()
_cop_num_inputs
=
3
_cop_num_outputs
=
2
_f16_ok
=
True
check_input
=
False
def
__init__
(
self
):
DnnBase
.
__init__
(
self
,
[
"c_code/dnn_sptf_gi.c"
],
"APPLY_SPECIFIC(dnn_sptf_gi)"
)
def
make_node
(
self
,
img
,
grid
,
dy
):
context_name
=
infer_context_name
(
img
,
grid
,
dy
)
img
=
as_gpuarray_variable
(
gpu_contiguous
(
img
),
context_name
)
if
img
.
ndim
!=
4
:
raise
TypeError
(
'img must have 4 dimensions.'
)
grid
=
as_gpuarray_variable
(
gpu_contiguous
(
grid
),
context_name
)
if
img
.
ndim
!=
grid
.
ndim
:
raise
TypeError
(
'grid should have the same number of dimensions as img'
)
dy
=
as_gpuarray_variable
(
dy
,
context_name
)
if
dy
.
ndim
!=
4
:
raise
TypeError
(
'dy must have 4 dimensions.'
)
dimg
=
img
.
type
()
dgrid
=
grid
.
type
()
inputs
=
[
img
,
grid
,
dy
]
outputs
=
[
dimg
,
dgrid
]
return
Apply
(
self
,
inputs
,
outputs
)
class
GpuDnnTransformerGradT
(
DnnBase
):
"""
Gradient of affine transformations Op for cuDNN Spatial Transformer.
"""
__props__
=
()
_cop_num_inputs
=
1
_cop_num_outputs
=
1
_f16_ok
=
True
check_input
=
False
def
__init__
(
self
):
DnnBase
.
__init__
(
self
,
[
"c_code/dnn_sptf_gt.c"
],
"APPLY_SPECIFIC(dnn_sptf_gt)"
)
def
make_node
(
self
,
dgrid
):
context_name
=
infer_context_name
(
dgrid
)
dgrid
=
as_gpuarray_variable
(
dgrid
,
context_name
)
assert
dgrid
.
dtype
in
(
'float16'
,
'float32'
,
'float64'
)
assert
dgrid
.
ndim
==
4
dtheta
=
GpuArrayType
(
dtype
=
dgrid
.
dtype
,
broadcastable
=
(
dgrid
.
type
.
ndim
-
1
)
*
(
False
,),
context_name
=
context_name
)()
inputs
=
[
dgrid
]
outputs
=
[
dtheta
]
return
Apply
(
self
,
inputs
,
outputs
)
def
dnn_spatialtf
(
img
,
theta
,
scale_width
=
1
,
scale_height
=
1
):
"""
GPU spatial transformer using cuDNN from NVIDIA.
Parameters
----------
img : tensor
Images to which the transformations will be applied. The implementation
assumes the tensor is in NCHW format, where N is the number of images,
C is the number of color channels, H is the height of the inputs, and
W is width of the inputs.
theta : tensor
Affine transformation tensor containing one affine transformation
matrix per image. ``theta`` is usually generated by the localization
network.
scale_height: float
A float specifying the scaling factor for the height of the output
image. A value of 1 will keep the original height of the input. Values
larger than 1 will upsample the input. Values below 1 will downsample
the input.
scale_width: float
A float specifying the scaling factor for the width of the output
image. A value of 1 will keep the original width of the input. Values
larger than 1 will upsample the input. Values below 1 will downsample
the input.
Returns
-------
out : tensor
Transformed images with width and height properly scaled.
Notes
-----
Currently, cuDNN only supports 2D transformations with 2x3 affine
transformation matrices.
Bilinear interpolation is the only grid sampler method available.
"""
out_dims
=
(
img
.
shape
[
0
],
img
.
shape
[
1
],
theano
.
tensor
.
ceil
(
img
.
shape
[
2
]
*
scale_height
),
theano
.
tensor
.
ceil
(
img
.
shape
[
3
]
*
scale_width
))
out_dims
=
tuple
([
as_scalar
(
v
)
.
astype
(
'int64'
)
for
v
in
out_dims
])
# Setup spatial transformer
grid
=
GpuDnnTransformerGrid
()(
theta
,
out_dims
)
sampler
=
GpuDnnTransformerSampler
()(
img
,
grid
)
return
sampler
def
local_abstractconv_cudnn_graph
(
op
,
context_name
,
inputs
,
outputs
):
if
(
not
isinstance
(
op
,
(
AbstractConv2d
,
AbstractConv2d_gradWeights
,
...
...
@@ -3137,6 +3367,7 @@ def local_dnn_convgw_inplace(node, inputs):
def
local_dnn_convgi_inplace
(
node
,
inputs
):
return
[
GpuDnnConvGradI
(
algo
=
node
.
op
.
algo
,
inplace
=
True
,
num_groups
=
node
.
op
.
num_groups
)(
*
inputs
)]
optdb
.
register
(
'local_dnna_conv_inplace'
,
tensor
.
opt
.
in2out
(
local_dnn_conv_inplace
,
local_dnn_convgw_inplace
,
...
...
theano/gpuarray/tests/test_dnn.py
浏览文件 @
6d71e317
...
...
@@ -2296,3 +2296,226 @@ class Cudnn_grouped_conv(Grouped_conv_noOptim):
conv_gradi_op
=
dnn
.
GpuDnnConvGradI
flip_filter
=
False
is_dnn
=
True
def
test_dnn_spatialtf
():
if
not
dnn
.
dnn_available
(
test_ctx_name
):
raise
SkipTest
(
dnn
.
dnn_available
.
msg
)
utt
.
seed_rng
()
"""
Spatial Transformer implementation using Theano from Lasagne
Original author: skaae (https://github.com/skaae)
"""
def
spatialtf_cpu
(
inp
,
theta
,
scale_height
,
scale_width
,
border_mode
=
'nearest'
):
num_batch
,
num_channels
,
height
,
width
=
inp
.
shape
theta
=
T
.
reshape
(
theta
,
(
-
1
,
2
,
3
))
# grid of (x_t, y_t, 1), eq (1) in ref [1]
out_height
=
T
.
cast
(
T
.
ceil
(
height
*
scale_height
),
'int64'
)
out_width
=
T
.
cast
(
T
.
ceil
(
width
*
scale_width
),
'int64'
)
grid
=
_meshgrid
(
out_height
,
out_width
)
# transform a x (x_t, y_t, 1)^t -> (x_s, y_s)
t_g
=
T
.
dot
(
theta
,
grid
)
x_s
=
t_g
[:,
0
]
y_s
=
t_g
[:,
1
]
x_s_flat
=
x_s
.
flatten
()
y_s_flat
=
y_s
.
flatten
()
# dimshuffle input to (bs, height, width, channels)
input_dim
=
inp
.
dimshuffle
(
0
,
2
,
3
,
1
)
input_transformed
=
_interpolate
(
input_dim
,
x_s_flat
,
y_s_flat
,
out_height
,
out_width
,
border_mode
)
output
=
T
.
reshape
(
input_transformed
,
(
num_batch
,
out_height
,
out_width
,
num_channels
))
output
=
output
.
dimshuffle
(
0
,
3
,
1
,
2
)
# dimshuffle to conv format
return
output
def
_interpolate
(
im
,
x
,
y
,
out_height
,
out_width
,
border_mode
):
# *_f are floats
num_batch
,
height
,
width
,
channels
=
im
.
shape
height_f
=
T
.
cast
(
height
,
theano
.
config
.
floatX
)
width_f
=
T
.
cast
(
width
,
theano
.
config
.
floatX
)
# scale coordinates from [-1, 1] to [0, dimension - 1], where dimension
# can be the width or height
x
=
(
x
+
1
)
/
2
*
(
width_f
-
1
)
y
=
(
y
+
1
)
/
2
*
(
height_f
-
1
)
# obtain indices of the 2x2 pixel neighborhood surrounding the coordinates;
# we need those in floatX for interpolation and in int64 for indexing.
x0_f
=
T
.
floor
(
x
)
y0_f
=
T
.
floor
(
y
)
x1_f
=
x0_f
+
1
y1_f
=
y0_f
+
1
# for indexing, we need to take care of the border mode for outside pixels.
if
border_mode
==
'nearest'
:
x0
=
T
.
clip
(
x0_f
,
0
,
width_f
-
1
)
x1
=
T
.
clip
(
x1_f
,
0
,
width_f
-
1
)
y0
=
T
.
clip
(
y0_f
,
0
,
height_f
-
1
)
y1
=
T
.
clip
(
y1_f
,
0
,
height_f
-
1
)
elif
border_mode
==
'mirror'
:
w
=
2
*
(
width_f
-
1
)
x0
=
T
.
minimum
(
x0_f
%
w
,
-
x0_f
%
w
)
x1
=
T
.
minimum
(
x1_f
%
w
,
-
x1_f
%
w
)
h
=
2
*
(
height_f
-
1
)
y0
=
T
.
minimum
(
y0_f
%
h
,
-
y0_f
%
h
)
y1
=
T
.
minimum
(
y1_f
%
h
,
-
y1_f
%
h
)
elif
border_mode
==
'wrap'
:
x0
=
T
.
mod
(
x0_f
,
width_f
)
x1
=
T
.
mod
(
x1_f
,
width_f
)
y0
=
T
.
mod
(
y0_f
,
height_f
)
y1
=
T
.
mod
(
y1_f
,
height_f
)
else
:
raise
ValueError
(
"border_mode must be one of "
"'nearest', 'mirror', 'wrap'"
)
x0
,
x1
,
y0
,
y1
=
(
T
.
cast
(
v
,
'int64'
)
for
v
in
(
x0
,
x1
,
y0
,
y1
))
# The input is [num_batch, height, width, channels]. We do the lookup in
# the flattened input, i.e [num_batch*height*width, channels]. We need
# to offset all indices to match the flat version
dim2
=
width
dim1
=
width
*
height
base
=
T
.
repeat
(
T
.
arange
(
num_batch
,
dtype
=
'int64'
)
*
dim1
,
out_height
*
out_width
)
base_y0
=
base
+
y0
*
dim2
base_y1
=
base
+
y1
*
dim2
idx_a
=
base_y0
+
x0
idx_b
=
base_y1
+
x0
idx_c
=
base_y0
+
x1
idx_d
=
base_y1
+
x1
# use indices to lookup pixels for all samples
im_flat
=
im
.
reshape
((
-
1
,
channels
))
Ia
=
im_flat
[
idx_a
]
Ib
=
im_flat
[
idx_b
]
Ic
=
im_flat
[
idx_c
]
Id
=
im_flat
[
idx_d
]
# calculate interpolated values
wa
=
((
x1_f
-
x
)
*
(
y1_f
-
y
))
.
dimshuffle
(
0
,
'x'
)
wb
=
((
x1_f
-
x
)
*
(
y
-
y0_f
))
.
dimshuffle
(
0
,
'x'
)
wc
=
((
x
-
x0_f
)
*
(
y1_f
-
y
))
.
dimshuffle
(
0
,
'x'
)
wd
=
((
x
-
x0_f
)
*
(
y
-
y0_f
))
.
dimshuffle
(
0
,
'x'
)
output
=
T
.
sum
([
wa
*
Ia
,
wb
*
Ib
,
wc
*
Ic
,
wd
*
Id
],
axis
=
0
)
return
output
def
_linspace
(
start
,
stop
,
num
):
# Theano linspace. Behaves similar to np.linspace
start
=
T
.
cast
(
start
,
theano
.
config
.
floatX
)
stop
=
T
.
cast
(
stop
,
theano
.
config
.
floatX
)
num
=
T
.
cast
(
num
,
theano
.
config
.
floatX
)
step
=
(
stop
-
start
)
/
(
num
-
1
)
return
T
.
arange
(
num
,
dtype
=
theano
.
config
.
floatX
)
*
step
+
start
def
_meshgrid
(
height
,
width
):
# This function is the grid generator from eq. (1) in reference [1].
# It is equivalent to the following numpy code:
# x_t, y_t = np.meshgrid(np.linspace(-1, 1, width),
# np.linspace(-1, 1, height))
# ones = np.ones(np.prod(x_t.shape))
# grid = np.vstack([x_t.flatten(), y_t.flatten(), ones])
# It is implemented in Theano instead to support symbolic grid sizes.
# Note: If the image size is known at layer construction time, we could
# compute the meshgrid offline in numpy instead of doing it dynamically
# in Theano. However, it hardly affected performance when we tried.
x_t
=
T
.
dot
(
T
.
ones
((
height
,
1
)),
_linspace
(
-
1.0
,
1.0
,
width
)
.
dimshuffle
(
'x'
,
0
))
y_t
=
T
.
dot
(
_linspace
(
-
1.0
,
1.0
,
height
)
.
dimshuffle
(
0
,
'x'
),
T
.
ones
((
1
,
width
)))
x_t_flat
=
x_t
.
reshape
((
1
,
-
1
))
y_t_flat
=
y_t
.
reshape
((
1
,
-
1
))
ones
=
T
.
ones_like
(
x_t_flat
)
grid
=
T
.
concatenate
([
x_t_flat
,
y_t_flat
,
ones
],
axis
=
0
)
return
grid
img_dims
=
(
5
,
3
,
16
,
16
)
img
=
np
.
random
.
random
(
size
=
img_dims
)
.
astype
(
theano
.
config
.
floatX
)
scale_height
=
0.25
scale_width
=
0.75
# Transformation matrix
transform
=
[[
-
1
,
0
,
0
],
[
0
,
-
1
,
0
]]
theta
=
np
.
asarray
(
img_dims
[
0
]
*
[
transform
],
dtype
=
theano
.
config
.
floatX
)
# Create symbolic variables for inputs and transformations
t_img
=
T
.
tensor4
(
'img'
)
t_theta
=
T
.
tensor3
(
'theta'
)
st_dnn
=
dnn
.
dnn_spatialtf
(
t_img
,
t_theta
,
scale_height
=
scale_height
,
scale_width
=
scale_width
)
st_dnn_func
=
theano
.
function
([
t_img
,
t_theta
],
st_dnn
)
# Check if function graph contains the spatial transformer's grid and sampler Ops
apply_nodes
=
st_dnn_func
.
maker
.
fgraph
.
apply_nodes
assert
any
([
isinstance
(
node
.
op
,
dnn
.
GpuDnnTransformerGrid
)
for
node
in
apply_nodes
])
assert
any
([
isinstance
(
node
.
op
,
dnn
.
GpuDnnTransformerSampler
)
for
node
in
apply_nodes
])
img_out_gpu
=
st_dnn_func
(
img
,
theta
)
img_out_gpu
=
np
.
asarray
(
img_out_gpu
)
# Setup CPU Op
st_cpu
=
spatialtf_cpu
(
t_img
,
t_theta
,
scale_height
,
scale_width
,
'nearest'
)
st_cpu_func
=
theano
.
function
([
t_img
,
t_theta
],
st_cpu
,
mode
=
mode_without_gpu
)
img_out_cpu
=
st_cpu_func
(
img
,
theta
)
atol
,
rtol
=
None
,
None
if
theano
.
config
.
floatX
==
'float16'
:
# Raise relative error tolerance when using float16
rtol
=
5e-2
utt
.
assert_allclose
(
img_out_cpu
,
img_out_gpu
,
atol
=
atol
,
rtol
=
rtol
)
def
test_dnn_spatialtf_grad
():
if
not
dnn
.
dnn_available
(
test_ctx_name
):
raise
SkipTest
(
dnn
.
dnn_available
.
msg
)
utt
.
seed_rng
()
inputs
=
T
.
tensor4
(
'inputs'
)
theta
=
T
.
tensor3
(
'theta'
)
out
=
dnn
.
dnn_spatialtf
(
inputs
,
theta
,
scale_height
=
0.25
,
scale_width
=
0.75
)
out_mean
=
T
.
mean
(
out
)
mean_gi
=
T
.
grad
(
out_mean
,
[
inputs
])
mean_gt
=
T
.
grad
(
out_mean
,
[
theta
])
f_gi
=
theano
.
function
([
inputs
,
theta
],
mean_gi
)
assert
any
([
isinstance
(
node
.
op
,
dnn
.
GpuDnnTransformerGradI
)
for
node
in
f_gi
.
maker
.
fgraph
.
apply_nodes
])
f_gt
=
theano
.
function
([
inputs
,
theta
],
mean_gt
)
assert
any
([
isinstance
(
node
.
op
,
dnn
.
GpuDnnTransformerGradT
)
for
node
in
f_gt
.
maker
.
fgraph
.
apply_nodes
])
input_dims
=
(
5
,
3
,
16
,
16
)
inputs_val
=
np
.
random
.
random
(
size
=
input_dims
)
.
astype
(
theano
.
config
.
floatX
)
# Tensor with transformations
theta_val
=
np
.
random
.
random
((
input_dims
[
0
],
2
,
3
))
.
astype
(
theano
.
config
.
floatX
)
# Using smaller values for theta, increases the precision of gradients
# when using lower precision. Tests might fail for lower precision data
# types if the values of theta or the inputs are very high.
theta
/=
100
# Check that the gradients are computed
f_gi
(
inputs_val
,
theta_val
)
f_gt
(
inputs_val
,
theta_val
)
def
grad_functor
(
inputs
,
theta
):
out
=
dnn
.
dnn_spatialtf
(
inputs
,
theta
)
return
out
atol
,
rtol
=
None
,
None
if
theano
.
config
.
floatX
==
'float32'
:
rtol
=
5e-2
elif
theano
.
config
.
floatX
==
'float16'
:
rtol
=
1e-0
utt
.
verify_grad
(
grad_functor
,
[
inputs_val
,
theta_val
],
mode
=
mode_with_gpu
,
abs_tol
=
atol
,
rel_tol
=
rtol
)
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论