Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
ae7f287f
提交
ae7f287f
authored
6月 05, 2017
作者:
João Victor Tozatti Risso
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Fix pygpu memory allocations and flattening of lengths and labels
Signed-off-by:
João Victor Tozatti Risso
<
joaovictor.risso@gmail.com
>
上级
808c8c64
隐藏空白字符变更
内嵌
并排
正在显示
1 个修改的文件
包含
118 行增加
和
28 行删除
+118
-28
ctc_wrapper.c
theano/gpuarray/ctc_wrapper.c
+118
-28
没有找到文件。
theano/gpuarray/ctc_wrapper.c
浏览文件 @
ae7f287f
#section kernels
#section support_code
#section support_code
typedef
struct
ctc_context
{
typedef
struct
ctc_context
{
...
@@ -23,7 +25,7 @@ void ctc_context_init(ctc_context_t * context)
...
@@ -23,7 +25,7 @@ void ctc_context_init(ctc_context_t * context)
void
ctc_context_destroy
(
ctc_context_t
*
context
)
void
ctc_context_destroy
(
ctc_context_t
*
context
)
{
{
if
(
NULL
!=
context
->
workspace
)
if
(
NULL
!=
context
->
workspace
)
f
ree
(
context
->
workspace
);
cudaF
ree
(
context
->
workspace
);
if
(
NULL
!=
context
->
input_lengths
)
if
(
NULL
!=
context
->
input_lengths
)
free
(
context
->
input_lengths
);
free
(
context
->
input_lengths
);
...
@@ -51,11 +53,62 @@ int ctc_check_result(ctcStatus_t retcode, const char * msg)
...
@@ -51,11 +53,62 @@ int ctc_check_result(ctcStatus_t retcode, const char * msg)
return
0
;
return
0
;
}
}
#section support_code_struct
void
create_contiguous_input_lengths
(
PyArrayObject
*
input_lengths_arr
,
int
**
input_lengths
)
{
npy_int
num_elements
=
PyArray_DIMS
(
input_lengths_arr
)[
0
];
*
input_lengths
=
(
int
*
)
malloc
(
num_elements
*
sizeof
(
int
)
);
if
(
NULL
==
(
*
input_lengths
)
)
return
;
for
(
npy_int
elem_idx
=
0
;
elem_idx
<
num_elements
;
++
elem_idx
)
{
(
*
input_lengths
)[
elem_idx
]
=
*
(
(
npy_int
*
)
PyArray_GETPTR1
(
input_lengths_arr
,
elem_idx
)
);
}
}
void
create_flat_labels
(
PyArrayObject
*
label_matrix
,
int
**
flat_labels
,
int
**
label_lengths
)
{
npy_int
rows
=
PyArray_DIMS
(
label_matrix
)[
0
];
npy_int
cols
=
PyArray_DIMS
(
label_matrix
)[
1
];
*
flat_labels
=
(
int
*
)
malloc
(
rows
*
cols
*
sizeof
(
int
)
);
if
(
NULL
==
(
*
flat_labels
)
)
return
;
*
label_lengths
=
(
int
*
)
malloc
(
rows
*
sizeof
(
int
)
);
if
(
NULL
==
(
*
label_lengths
)
)
{
free
(
*
flat_labels
);
*
flat_labels
=
NULL
;
return
;
}
npy_int
label_index
=
0
;
for
(
npy_int
row_idx
=
0
;
row_idx
<
rows
;
++
row_idx
)
{
npy_int
label_length
=
0
;
for
(
npy_int
col_idx
=
0
;
col_idx
<
cols
;
++
col_idx
)
{
npy_int
label
=
*
(
(
npy_int
*
)
PyArray_GETPTR2
(
label_matrix
,
row_idx
,
col_idx
)
);
if
(
label
>=
0
)
// negative values are assumed to be padding
{
(
*
flat_labels
)[
label_index
++
]
=
label
;
++
label_length
;
}
}
(
*
label_lengths
)[
row_idx
]
=
label_length
;
}
}
#section support_code_apply
int
APPLY_SPECIFIC
(
ctc_cost_gpu
)(
PyGpuArrayObject
*
in_activations
,
int
APPLY_SPECIFIC
(
ctc_cost_gpu
)(
PyGpuArrayObject
*
in_activations
,
Py
GpuArrayObject
*
in_labels
,
Py
ArrayObject
*
in_labels
,
Py
GpuArrayObject
*
in_input_lengths
,
Py
ArrayObject
*
in_input_lengths
,
PyGpuArrayObject
**
out_costs
,
PyGpuArrayObject
**
out_costs
,
PyGpuArrayObject
**
out_gradients
,
PyGpuArrayObject
**
out_gradients
,
PyGpuContextObject
*
ctx
)
PyGpuContextObject
*
ctx
)
...
@@ -64,34 +117,44 @@ int APPLY_SPECIFIC(ctc_cost_gpu)(PyGpuArrayObject * in_activations,
...
@@ -64,34 +117,44 @@ int APPLY_SPECIFIC(ctc_cost_gpu)(PyGpuArrayObject * in_activations,
ctc_context_t
*
context
=
&
ctc_object
;
ctc_context_t
*
context
=
&
ctc_object
;
ctc_context_init
(
context
);
ctc_context_init
(
context
);
if
(
!
PyArray_IS_C_CONTIGUOUS
(
in_activations
)
)
npy_float32
*
activations
=
(
npy_float32
*
)
PyGpuArray_DEV_DATA
(
in_activations
);
create_contiguous_input_lengths
(
in_input_lengths
,
&
(
context
->
input_lengths
)
);
if
(
NULL
==
context
->
input_lengths
)
{
{
PyErr_
SetString
(
PyExc_Runtime
Error
,
PyErr_
Format
(
PyExc_Memory
Error
,
"
activations array must be C-contiguous.
"
);
"
Could not allocate storage for input lengths
"
);
return
1
;
return
1
;
}
}
npy_float32
*
activations
=
(
npy_float32
*
)
PyArray_DATA
(
in_activations
);
// flatten labels to conform with library memory layout
create_flat_labels
(
in_labels
,
&
(
context
->
flat_labels
),
&
(
context
->
label_lengths
)
);
// TODO: flatten input_lengths to conform with underlying library memory layout
// TODO: flatten labels to conform with underlying library memory layout
if
(
(
NULL
==
context
->
label_lengths
)
||
(
NULL
==
context
->
flat_labels
)
)
{
// Destroy previous CTC context before returning exception
ctc_context_destroy
(
context
);
PyErr_Format
(
PyExc_MemoryError
,
"Could not allocate storage for labels and their lengths"
);
return
1
;
}
const
npy_int
minibatch_size
=
Py
Array_DIMS
(
in_activations
)[
1
];
const
size_t
minibatch_size
=
PyGpu
Array_DIMS
(
in_activations
)[
1
];
const
npy_int
alphabet_size
=
Py
Array_DIMS
(
in_activations
)[
2
];
const
size_t
alphabet_size
=
PyGpu
Array_DIMS
(
in_activations
)[
2
];
npy_float32
*
costs
=
NULL
;
npy_float32
*
costs
=
NULL
;
const
npy_intp
cost_size
=
minibatch_size
;
const
size_t
cost_size
=
minibatch_size
;
if
(
NULL
==
*
out_costs
||
// symbolic variable has no real backing
if
(
NULL
==
*
out_costs
||
// symbolic variable has no real backing
PyArray_NDIM
(
*
out_costs
)
!=
1
||
Py
Gpu
Array_NDIM
(
*
out_costs
)
!=
1
||
PyArray_DIMS
(
*
out_costs
)[
0
]
!=
cost_size
)
Py
Gpu
Array_DIMS
(
*
out_costs
)[
0
]
!=
cost_size
)
{
{
P
Y
_XDECREF
(
*
out_costs
);
P
y
_XDECREF
(
*
out_costs
);
*
out_costs
=
pygpu_zeros
(
1
,
cost_size
,
GA_FLOAT
,
GA_C_ORDER
,
*
out_costs
=
pygpu_zeros
(
1
,
&
cost_size
,
GA_FLOAT
,
GA_C_ORDER
,
ctx
,
Py_None
);
ctx
,
Py_None
);
if
(
NULL
==
*
out_costs
)
if
(
NULL
==
*
out_costs
)
{
{
...
@@ -104,21 +167,23 @@ int APPLY_SPECIFIC(ctc_cost_gpu)(PyGpuArrayObject * in_activations,
...
@@ -104,21 +167,23 @@ int APPLY_SPECIFIC(ctc_cost_gpu)(PyGpuArrayObject * in_activations,
}
}
}
}
costs
=
(
npy_float32
*
)
Py
Array
_DATA
(
*
out_costs
);
costs
=
(
npy_float32
*
)
Py
GpuArray_DEV
_DATA
(
*
out_costs
);
npy_float32
*
gradients
=
NULL
;
npy_float32
*
gradients
=
NULL
;
if
(
NULL
!=
out_gradients
)
// if gradient computation is not disabled
if
(
NULL
!=
out_gradients
)
// if gradient computation is not disabled
{
{
if
(
NULL
==
*
out_gradients
||
if
(
NULL
==
*
out_gradients
||
PyArray_NDIM
(
*
out_gradients
)
!=
3
||
Py
Gpu
Array_NDIM
(
*
out_gradients
)
!=
3
||
Py
Array_DIMS
(
*
out_gradients
)[
0
]
!=
Py
Array_DIMS
(
in_activations
)[
0
]
||
Py
GpuArray_DIMS
(
*
out_gradients
)[
0
]
!=
PyGpu
Array_DIMS
(
in_activations
)[
0
]
||
Py
Array_DIMS
(
*
out_gradients
)[
1
]
!=
Py
Array_DIMS
(
in_activations
)[
1
]
||
Py
GpuArray_DIMS
(
*
out_gradients
)[
1
]
!=
PyGpu
Array_DIMS
(
in_activations
)[
1
]
||
Py
Array_DIMS
(
*
out_gradients
)[
2
]
!=
Py
Array_DIMS
(
in_activations
)[
2
]
)
Py
GpuArray_DIMS
(
*
out_gradients
)[
2
]
!=
PyGpu
Array_DIMS
(
in_activations
)[
2
]
)
{
{
Py_XDECREF
(
*
out_gradients
);
Py_XDECREF
(
*
out_gradients
);
*
out_gradients
=
pygpu_zeros
(
3
,
PyArray_DIMS
(
in_activations
),
GA_FLOAT
,
0
);
const
size_t
*
activation_dims
=
PyGpuArray_DIMS
(
in_activations
);
*
out_gradients
=
pygpu_zeros
(
3
,
activation_dims
,
GA_FLOAT
,
GA_C_ORDER
,
ctx
,
Py_None
);
if
(
NULL
==
*
out_gradients
)
if
(
NULL
==
*
out_gradients
)
{
{
...
@@ -130,7 +195,32 @@ int APPLY_SPECIFIC(ctc_cost_gpu)(PyGpuArrayObject * in_activations,
...
@@ -130,7 +195,32 @@ int APPLY_SPECIFIC(ctc_cost_gpu)(PyGpuArrayObject * in_activations,
}
}
}
}
gradients
=
(
npy_float32
*
)
PyArray_DATA
(
*
out_gradients
);
gradients
=
(
npy_float32
*
)
PyGpuArray_DEV_DATA
(
*
out_gradients
);
}
size_t
gpu_workspace_size
;
int
ctc_error
=
0
;
ctc_error
=
ctc_check_result
(
get_workspace_size
(
context
->
label_lengths
,
context
->
input_lengths
,
alphabet_size
,
minibatch_size
,
context
->
options
,
&
gpu_workspace_size
),
"Failed to obtain CTC workspace size!"
);
if
(
ctc_error
)
// Exception is set by ctc_check_result, return error here
{
// Destroy previous CTC context before returning exception
ctc_context_destroy
(
context
);
return
1
;
}
if
(
cudaSuccess
!=
cudaMalloc
(
&
(
context
->
workspace
),
gpu_workspace_size
)
)
{
ctc_context_destroy
(
context
);
PyErr_Format
(
PyExc_MemoryError
,
"Failed to allocate memory for CTC workspace!"
);
return
1
;
}
}
ctc_context_destroy
(
context
);
ctc_context_destroy
(
context
);
...
@@ -139,8 +229,8 @@ int APPLY_SPECIFIC(ctc_cost_gpu)(PyGpuArrayObject * in_activations,
...
@@ -139,8 +229,8 @@ int APPLY_SPECIFIC(ctc_cost_gpu)(PyGpuArrayObject * in_activations,
}
}
int
APPLY_SPECIFIC
(
ctc_cost_gpu_no_grad
)(
PyGpuArrayObject
*
in_activations
,
int
APPLY_SPECIFIC
(
ctc_cost_gpu_no_grad
)(
PyGpuArrayObject
*
in_activations
,
Py
GpuArrayObject
*
in_labels
,
Py
ArrayObject
*
in_labels
,
Py
GpuArrayObject
*
in_input_lengths
,
Py
ArrayObject
*
in_input_lengths
,
PyGpuArrayObject
**
out_costs
,
PyGpuArrayObject
**
out_costs
,
PyGpuContextObject
*
ctx
)
PyGpuContextObject
*
ctx
)
{
{
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论