Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
4b94a811
提交
4b94a811
authored
8月 30, 2016
作者:
Arnaud Bergeron
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Create the cudnn handle on context initialization and pass it around.
上级
3f536fb7
显示空白字符变更
内嵌
并排
正在显示
13 个修改的文件
包含
125 行增加
和
89 行删除
+125
-89
__init__.py
theano/gpuarray/__init__.py
+12
-7
dnn.py
theano/gpuarray/dnn.py
+63
-7
dnn_base.c
theano/gpuarray/dnn_base.c
+0
-38
dnn_batchnorm.c
theano/gpuarray/dnn_batchnorm.c
+4
-2
dnn_batchnorm_grad.c
theano/gpuarray/dnn_batchnorm_grad.c
+4
-2
dnn_batchnorm_inf.c
theano/gpuarray/dnn_batchnorm_inf.c
+4
-2
dnn_fwd.c
theano/gpuarray/dnn_fwd.c
+9
-8
dnn_gi.c
theano/gpuarray/dnn_gi.c
+8
-7
dnn_gw.c
theano/gpuarray/dnn_gw.c
+8
-7
dnn_pool.c
theano/gpuarray/dnn_pool.c
+4
-3
dnn_pool_grad.c
theano/gpuarray/dnn_pool_grad.c
+3
-2
dnn_softmax.c
theano/gpuarray/dnn_softmax.c
+3
-2
dnn_softmax_grad.c
theano/gpuarray/dnn_softmax_grad.c
+3
-2
没有找到文件。
theano/gpuarray/__init__.py
浏览文件 @
4b94a811
...
@@ -26,7 +26,7 @@ except ImportError:
...
@@ -26,7 +26,7 @@ except ImportError:
# This is for documentation not to depend on the availability of pygpu
# This is for documentation not to depend on the availability of pygpu
from
.type
import
(
GpuArrayType
,
GpuArrayVariable
,
GpuArrayConstant
,
from
.type
import
(
GpuArrayType
,
GpuArrayVariable
,
GpuArrayConstant
,
GpuArraySharedVariable
,
gpuarray_shared_constructor
,
GpuArraySharedVariable
,
gpuarray_shared_constructor
,
reg_context
,
get_context
,
ContextNotDefined
)
reg_context
,
get_context
,
ContextNotDefined
,
_get_props
)
from
.basic_ops
import
as_gpuarray_variable
from
.basic_ops
import
as_gpuarray_variable
from
.
import
fft
,
dnn
,
opt
,
nerv
,
extra_ops
,
multinomial
from
.
import
fft
,
dnn
,
opt
,
nerv
,
extra_ops
,
multinomial
...
@@ -89,17 +89,22 @@ def init_dev(dev, name=None):
...
@@ -89,17 +89,22 @@ def init_dev(dev, name=None):
(
name
,
dev
,
context
.
devname
),
(
name
,
dev
,
context
.
devname
),
file
=
sys
.
stderr
)
file
=
sys
.
stderr
)
pygpu_activated
=
True
pygpu_activated
=
True
ctx_props
=
_get_props
(
name
)
ctx_props
[
'dev'
]
=
dev
if
dev
.
startswith
(
'cuda'
):
if
dev
.
startswith
(
'cuda'
):
if
'cudnn_version'
not
in
ctx_props
:
try
:
try
:
cudnn_version
=
dnn
.
version
()
ctx_props
[
'cudnn_version'
]
=
dnn
.
version
()
# 5200 should not print warning with cudnn 5.1 final.
# 5200 should not print warning with cudnn 5.1 final.
if
cudnn_version
>=
5200
:
if
ctx_props
[
'cudnn_version'
]
>=
5200
:
warnings
.
warn
(
"Your cuDNN version is more recent than Theano."
warnings
.
warn
(
"Your cuDNN version is more recent than "
" If you see problems, try updating Theano or"
"Theano. If you encounter problems, try "
" downgrading cuDNN to version 5.1."
)
"updating Theano or downgrading cuDNN to "
"version 5.1."
)
if
config
.
print_active_device
:
if
config
.
print_active_device
:
print
(
"Using cuDNN version
%
d on context
%
s"
%
print
(
"Using cuDNN version
%
d on context
%
s"
%
(
cudnn_version
,
name
),
file
=
sys
.
stderr
)
(
ctx_props
[
'cudnn_version'
],
name
),
file
=
sys
.
stderr
)
ctx_props
[
'cudnn_handle'
]
=
dnn
.
_make_handle
(
context
)
except
Exception
:
except
Exception
:
pass
pass
...
...
theano/gpuarray/dnn.py
浏览文件 @
4b94a811
...
@@ -26,7 +26,8 @@ from theano.tensor.nnet.abstract_conv import (AbstractConv2d,
...
@@ -26,7 +26,8 @@ from theano.tensor.nnet.abstract_conv import (AbstractConv2d,
from
theano.tensor.signal.pool
import
(
from
theano.tensor.signal.pool
import
(
Pool
,
MaxPoolGrad
,
AveragePoolGrad
)
Pool
,
MaxPoolGrad
,
AveragePoolGrad
)
from
.
import
pygpu
from
.
import
pygpu
from
.type
import
get_context
,
gpu_context_type
,
list_contexts
from
.type
import
(
get_context
,
gpu_context_type
,
list_contexts
,
get_prop
,
set_prop
)
from
.basic_ops
import
(
as_gpuarray_variable
,
infer_context_name
,
from
.basic_ops
import
(
as_gpuarray_variable
,
infer_context_name
,
gpu_contiguous
,
gpu_alloc_empty
,
gpu_contiguous
,
gpu_alloc_empty
,
empty_like
,
GpuArrayType
)
empty_like
,
GpuArrayType
)
...
@@ -42,6 +43,44 @@ from .opt_util import alpha_merge, output_merge, inplace_allocempty, pad_dims, u
...
@@ -42,6 +43,44 @@ from .opt_util import alpha_merge, output_merge, inplace_allocempty, pad_dims, u
from
theano.configdefaults
import
SUPPORTED_DNN_CONV_ALGO_BWD_FILTER
from
theano.configdefaults
import
SUPPORTED_DNN_CONV_ALGO_BWD_FILTER
try
:
from
pygpu
import
gpuarray
except
ImportError
:
pass
def
_dnn_lib
():
if
_dnn_lib
.
handle
is
None
:
import
ctypes.util
lib_name
=
ctypes
.
util
.
find_library
(
'cudnn'
)
if
lib_name
is
None
and
sys
.
platform
==
'win32'
:
# Update these names when new versions of cudnn are supported.
lib_name
=
ctypes
.
util
.
find_library
(
'cudnn64_5.dll'
)
if
lib_name
is
None
:
lib_name
=
ctypes
.
util
.
find_library
(
'cudnn64_4.dll'
)
if
lib_name
is
None
:
raise
RuntimeError
(
'Could not find cudnn library'
)
_dnn_lib
.
handle
=
ctypes
.
cdll
.
LoadLibrary
(
lib_name
)
cudnn
=
_dnn_lib
.
handle
cudnn
.
cudnnCreate
.
argtypes
=
[
ctypes
.
POINTER
(
ctypes
.
c_void_p
)]
cudnn
.
cudnnCreate
.
restype
=
ctypes
.
c_int
cudnn
.
cudnnDestroy
.
argtypes
=
[
ctypes
.
c_void_p
]
cudnn
.
cudnnDestroy
.
restype
=
ctypes
.
c_int
return
_dnn_lib
.
handle
_dnn_lib
.
handle
=
None
def
_make_handle
(
ctx
):
cudnn
=
_dnn_lib
()
handle
=
ctypes
.
c_void_p
()
with
ctx
:
err
=
cudnn
.
cudnnCreate
(
ctypes
.
byref
(
handle
))
if
err
!=
0
:
raise
RuntimeError
(
"error creating cudnn handle"
)
return
handle
def
raise_no_cudnn
(
msg
=
"cuDNN is required for convolution and pooling"
):
def
raise_no_cudnn
(
msg
=
"cuDNN is required for convolution and pooling"
):
raise
RuntimeError
(
msg
)
raise
RuntimeError
(
msg
)
...
@@ -144,6 +183,12 @@ def dnn_available(context_name):
...
@@ -144,6 +183,12 @@ def dnn_available(context_name):
dnn_available
.
msg
=
None
dnn_available
.
msg
=
None
handle_type
=
CDataType
(
'cudnnHandle_t'
,
'cudnnDestroy'
,
headers
=
[
'cudnn.h'
],
header_dirs
=
[
config
.
dnn
.
include_path
],
libraries
=
[
'cudnn'
],
lib_dirs
=
[
config
.
dnn
.
library_path
])
class
DnnBase
(
COp
):
class
DnnBase
(
COp
):
...
@@ -154,10 +199,20 @@ class DnnBase(COp):
...
@@ -154,10 +199,20 @@ class DnnBase(COp):
# dnn does not know about broadcasting, so we do not need to assert
# dnn does not know about broadcasting, so we do not need to assert
# the input broadcasting pattern.
# the input broadcasting pattern.
check_broadcast
=
False
check_broadcast
=
False
params_type
=
gpu_context_type
params_type
=
handle_type
def
dnn_context
(
self
,
node
):
return
node
.
outputs
[
0
]
.
type
.
context_name
def
get_params
(
self
,
node
):
def
get_params
(
self
,
node
):
return
node
.
outputs
[
0
]
.
type
.
context
try
:
return
get_prop
(
self
.
dnn_context
(
node
),
'cudnn_handle_param'
)
except
KeyError
:
pass
ptr
=
get_prop
(
self
.
dnn_context
(
node
),
'cudnn_handle'
)
.
value
res
=
handle_type
.
make_value
(
ptr
)
set_prop
(
self
.
dnn_context
(
node
),
'cudnn_handle_param'
,
res
)
return
res
def
__init__
(
self
,
files
=
None
,
c_func
=
None
):
def
__init__
(
self
,
files
=
None
,
c_func
=
None
):
if
files
is
None
:
if
files
is
None
:
...
@@ -165,9 +220,10 @@ class DnnBase(COp):
...
@@ -165,9 +220,10 @@ class DnnBase(COp):
COp
.
__init__
(
self
,
[
"dnn_base.c"
]
+
files
,
c_func
)
COp
.
__init__
(
self
,
[
"dnn_base.c"
]
+
files
,
c_func
)
def
c_headers
(
self
):
def
c_headers
(
self
):
return
[
'cudnn.h'
,
'cudnn_helper.h'
,
'gpuarray_helper.h'
,
return
[
'gpuarray/types.h'
,
'gpuarray/array.h'
,
'gpuarray/kernel.h'
,
'gpuarray/types.h'
,
'gpuarray/array.h'
,
'gpuarray/util.h'
,
'gpuarray/util.h'
,
'gpuarray/ext_cuda.h'
,
'gpuarray_api.h'
,
'gpuarray/ext_cuda.h'
,
'gpuarray_api.h'
,
'numpy_compat.h'
]
'numpy_compat.h'
,
'cudnn.h'
,
'cudnn_helper.h'
,
'gpuarray_helper.h'
]
def
c_header_dirs
(
self
):
def
c_header_dirs
(
self
):
return
[
os
.
path
.
dirname
(
__file__
),
pygpu
.
get_include
(),
return
[
os
.
path
.
dirname
(
__file__
),
pygpu
.
get_include
(),
...
@@ -183,7 +239,7 @@ class DnnBase(COp):
...
@@ -183,7 +239,7 @@ class DnnBase(COp):
return
[
'-Wl,-rpath,'
+
config
.
dnn
.
library_path
]
return
[
'-Wl,-rpath,'
+
config
.
dnn
.
library_path
]
def
c_code_cache_version
(
self
):
def
c_code_cache_version
(
self
):
return
(
super
(
DnnBase
,
self
)
.
c_code_cache_version
(),
version
())
return
(
super
(
DnnBase
,
self
)
.
c_code_cache_version
(),
version
()
,
1
)
class
DnnVersion
(
Op
):
class
DnnVersion
(
Op
):
...
...
theano/gpuarray/dnn_base.c
浏览文件 @
4b94a811
...
@@ -149,41 +149,3 @@ static int c_make_filter(PyGpuArrayObject *var, cudnnFilterDescriptor_t *desc) {
...
@@ -149,41 +149,3 @@ static int c_make_filter(PyGpuArrayObject *var, cudnnFilterDescriptor_t *desc) {
#section init_code
#section init_code
setup_ext_cuda
();
setup_ext_cuda
();
#section support_code_struct
PyGpuContextObject
*
ctx
;
cudnnHandle_t
APPLY_SPECIFIC
(
_handle
);
#section init_code_struct
{
// We need to keep a reference here to have it available in the destructor.
ctx
=
PARAMS
;
Py_INCREF
(
ctx
);
cuda_enter
(
PARAMS
->
ctx
);
cudnnStatus_t
err
;
APPLY_SPECIFIC
(
_handle
)
=
NULL
;
if
((
err
=
cudnnCreate
(
&
APPLY_SPECIFIC
(
_handle
)))
!=
CUDNN_STATUS_SUCCESS
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"could not create cuDNN handle: %s"
,
cudnnGetErrorString
(
err
));
cuda_exit
(
PARAMS
->
ctx
);
FAIL
;
}
if
((
err
=
cudnnSetStream
(
APPLY_SPECIFIC
(
_handle
),
cuda_get_stream
(
PARAMS
->
ctx
)))
!=
CUDNN_STATUS_SUCCESS
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"Could not set cudnn stream: %s"
,
cudnnGetErrorString
(
err
));
cuda_exit
(
PARAMS
->
ctx
);
FAIL
;
}
cuda_exit
(
PARAMS
->
ctx
);
}
#section cleanup_code_struct
cuda_enter
(
ctx
->
ctx
);
cudnnDestroy
(
APPLY_SPECIFIC
(
_handle
));
cuda_exit
(
ctx
->
ctx
);
Py_DECREF
((
PyObject
*
)
ctx
);
theano/gpuarray/dnn_batchnorm.c
浏览文件 @
4b94a811
...
@@ -3,7 +3,9 @@
...
@@ -3,7 +3,9 @@
int
dnn_batchnorm_op
(
PyGpuArrayObject
*
inp
,
PyGpuArrayObject
*
scale
,
int
dnn_batchnorm_op
(
PyGpuArrayObject
*
inp
,
PyGpuArrayObject
*
scale
,
PyGpuArrayObject
*
bias
,
npy_float64
epsilon
,
PyGpuArrayObject
*
bias
,
npy_float64
epsilon
,
PyGpuArrayObject
**
outp
,
PyGpuArrayObject
**
x_mean
,
PyGpuArrayObject
**
outp
,
PyGpuArrayObject
**
x_mean
,
PyGpuArrayObject
**
x_invstd
,
PyGpuContextObject
*
c
)
{
PyGpuArrayObject
**
x_invstd
,
cudnnHandle_t
_handle
)
{
PyGpuContextObject
*
c
=
inp
->
context
;
if
(
c_set_tensorNd
(
inp
,
bn_input
)
!=
0
)
if
(
c_set_tensorNd
(
inp
,
bn_input
)
!=
0
)
return
1
;
return
1
;
if
(
c_set_tensorNd
(
scale
,
bn_params
)
!=
0
)
if
(
c_set_tensorNd
(
scale
,
bn_params
)
!=
0
)
...
@@ -37,7 +39,7 @@ int dnn_batchnorm_op(PyGpuArrayObject *inp, PyGpuArrayObject *scale,
...
@@ -37,7 +39,7 @@ int dnn_batchnorm_op(PyGpuArrayObject *inp, PyGpuArrayObject *scale,
beta
=
(
void
*
)
&
fbeta
;
beta
=
(
void
*
)
&
fbeta
;
}
}
cudnnStatus_t
err
=
cudnnBatchNormalizationForwardTraining
(
cudnnStatus_t
err
=
cudnnBatchNormalizationForwardTraining
(
APPLY_SPECIFIC
(
_handle
)
,
_handle
,
MODE
,
MODE
,
alpha
,
alpha
,
beta
,
beta
,
...
...
theano/gpuarray/dnn_batchnorm_grad.c
浏览文件 @
4b94a811
...
@@ -24,7 +24,9 @@ int dnn_batchnorm_grad(PyGpuArrayObject *inp, PyGpuArrayObject *doutp,
...
@@ -24,7 +24,9 @@ int dnn_batchnorm_grad(PyGpuArrayObject *inp, PyGpuArrayObject *doutp,
PyGpuArrayObject
*
scale
,
PyGpuArrayObject
*
x_mean
,
PyGpuArrayObject
*
scale
,
PyGpuArrayObject
*
x_mean
,
PyGpuArrayObject
*
x_invstd
,
npy_float64
epsilon
,
PyGpuArrayObject
*
x_invstd
,
npy_float64
epsilon
,
PyGpuArrayObject
**
dinp
,
PyGpuArrayObject
**
dscale
,
PyGpuArrayObject
**
dinp
,
PyGpuArrayObject
**
dscale
,
PyGpuArrayObject
**
dbias
,
PyGpuContextObject
*
c
)
{
PyGpuArrayObject
**
dbias
,
cudnnHandle_t
_handle
)
{
PyGpuContextObject
*
c
=
inp
->
context
;
if
(
c_set_tensorNd
(
inp
,
bn_input
)
!=
0
)
if
(
c_set_tensorNd
(
inp
,
bn_input
)
!=
0
)
return
1
;
return
1
;
if
(
c_set_tensorNd
(
doutp
,
bn_doutput
)
!=
0
)
if
(
c_set_tensorNd
(
doutp
,
bn_doutput
)
!=
0
)
...
@@ -66,7 +68,7 @@ int dnn_batchnorm_grad(PyGpuArrayObject *inp, PyGpuArrayObject *doutp,
...
@@ -66,7 +68,7 @@ int dnn_batchnorm_grad(PyGpuArrayObject *inp, PyGpuArrayObject *doutp,
betaParam
=
(
void
*
)
&
fbeta
;
betaParam
=
(
void
*
)
&
fbeta
;
}
}
cudnnStatus_t
err
=
cudnnBatchNormalizationBackward
(
cudnnStatus_t
err
=
cudnnBatchNormalizationBackward
(
APPLY_SPECIFIC
(
_handle
)
,
_handle
,
MODE
,
MODE
,
alphaData
,
alphaData
,
betaData
,
betaData
,
...
...
theano/gpuarray/dnn_batchnorm_inf.c
浏览文件 @
4b94a811
...
@@ -3,7 +3,9 @@
...
@@ -3,7 +3,9 @@
int
dnn_batchnorm_op
(
PyGpuArrayObject
*
inp
,
PyGpuArrayObject
*
scale
,
int
dnn_batchnorm_op
(
PyGpuArrayObject
*
inp
,
PyGpuArrayObject
*
scale
,
PyGpuArrayObject
*
bias
,
PyGpuArrayObject
*
est_mean
,
PyGpuArrayObject
*
bias
,
PyGpuArrayObject
*
est_mean
,
PyGpuArrayObject
*
est_var
,
npy_float64
epsilon
,
PyGpuArrayObject
*
est_var
,
npy_float64
epsilon
,
PyGpuArrayObject
**
outp
,
PyGpuContextObject
*
c
)
{
PyGpuArrayObject
**
outp
,
cudnnHandle_t
_handle
)
{
PyGpuContextObject
*
c
=
inp
->
context
;
if
(
c_set_tensorNd
(
inp
,
bn_input
)
!=
0
)
if
(
c_set_tensorNd
(
inp
,
bn_input
)
!=
0
)
return
1
;
return
1
;
if
(
c_set_tensorNd
(
scale
,
bn_params
)
!=
0
)
if
(
c_set_tensorNd
(
scale
,
bn_params
)
!=
0
)
...
@@ -33,7 +35,7 @@ int dnn_batchnorm_op(PyGpuArrayObject *inp, PyGpuArrayObject *scale,
...
@@ -33,7 +35,7 @@ int dnn_batchnorm_op(PyGpuArrayObject *inp, PyGpuArrayObject *scale,
beta
=
(
void
*
)
&
fbeta
;
beta
=
(
void
*
)
&
fbeta
;
}
}
cudnnStatus_t
err
=
cudnnBatchNormalizationForwardInference
(
cudnnStatus_t
err
=
cudnnBatchNormalizationForwardInference
(
APPLY_SPECIFIC
(
_handle
)
,
_handle
,
MODE
,
MODE
,
alpha
,
alpha
,
beta
,
beta
,
...
...
theano/gpuarray/dnn_fwd.c
浏览文件 @
4b94a811
...
@@ -26,11 +26,12 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
...
@@ -26,11 +26,12 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
cudnnConvolutionDescriptor_t
desc
,
cudnnConvolutionDescriptor_t
desc
,
double
alpha
,
double
beta
,
double
alpha
,
double
beta
,
PyGpuArrayObject
**
output
,
PyGpuArrayObject
**
output
,
PyGpuContextObject
*
c
)
{
cudnnHandle_t
_handle
)
{
cudnnStatus_t
err
=
CUDNN_STATUS_SUCCESS
;
PyGpuContextObject
*
c
=
input
->
context
;
float
af
=
alpha
,
bf
=
beta
;
void
*
alpha_p
;
void
*
alpha_p
;
void
*
beta_p
;
void
*
beta_p
;
float
af
=
alpha
,
bf
=
beta
;
cudnnStatus_t
err
=
CUDNN_STATUS_SUCCESS
;
if
(
PyGpuArray_DIMS
(
input
)[
1
]
!=
PyGpuArray_DIMS
(
kerns
)[
1
])
{
if
(
PyGpuArray_DIMS
(
input
)[
1
]
!=
PyGpuArray_DIMS
(
kerns
)[
1
])
{
PyErr_SetString
(
PyExc_ValueError
,
PyErr_SetString
(
PyExc_ValueError
,
...
@@ -92,7 +93,7 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
...
@@ -92,7 +93,7 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
int
count
;
int
count
;
cudnnConvolutionFwdAlgoPerf_t
choice
;
cudnnConvolutionFwdAlgoPerf_t
choice
;
err
=
cudnnFindConvolutionForwardAlgorithm
(
err
=
cudnnFindConvolutionForwardAlgorithm
(
APPLY_SPECIFIC
(
_handle
)
,
APPLY_SPECIFIC
(
input
),
APPLY_SPECIFIC
(
kerns
),
_handle
,
APPLY_SPECIFIC
(
input
),
APPLY_SPECIFIC
(
kerns
),
desc
,
APPLY_SPECIFIC
(
output
),
1
,
&
count
,
&
choice
);
desc
,
APPLY_SPECIFIC
(
output
),
1
,
&
count
,
&
choice
);
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
...
@@ -115,7 +116,7 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
...
@@ -115,7 +116,7 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
}
}
err
=
cudnnGetConvolutionForwardAlgorithm
(
err
=
cudnnGetConvolutionForwardAlgorithm
(
APPLY_SPECIFIC
(
_handle
)
,
APPLY_SPECIFIC
(
input
),
APPLY_SPECIFIC
(
kerns
),
_handle
,
APPLY_SPECIFIC
(
input
),
APPLY_SPECIFIC
(
kerns
),
desc
,
APPLY_SPECIFIC
(
output
),
desc
,
APPLY_SPECIFIC
(
output
),
CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT
,
free
,
&
algo
);
CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT
,
free
,
&
algo
);
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
...
@@ -198,7 +199,7 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
...
@@ -198,7 +199,7 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
{
{
size_t
worksize
;
size_t
worksize
;
gpudata
*
workspace
;
gpudata
*
workspace
;
err
=
cudnnGetConvolutionForwardWorkspaceSize
(
APPLY_SPECIFIC
(
_handle
)
,
err
=
cudnnGetConvolutionForwardWorkspaceSize
(
_handle
,
APPLY_SPECIFIC
(
input
),
APPLY_SPECIFIC
(
input
),
APPLY_SPECIFIC
(
kerns
),
APPLY_SPECIFIC
(
kerns
),
desc
,
desc
,
...
@@ -211,7 +212,7 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
...
@@ -211,7 +212,7 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
// TODO: Print a warning
// TODO: Print a warning
algo
=
CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM
;
algo
=
CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM
;
err
=
cudnnGetConvolutionForwardWorkspaceSize
(
APPLY_SPECIFIC
(
_handle
)
,
err
=
cudnnGetConvolutionForwardWorkspaceSize
(
_handle
,
APPLY_SPECIFIC
(
input
),
APPLY_SPECIFIC
(
input
),
APPLY_SPECIFIC
(
kerns
),
APPLY_SPECIFIC
(
kerns
),
desc
,
desc
,
...
@@ -248,7 +249,7 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
...
@@ -248,7 +249,7 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
cuda_wait
((
*
output
)
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_WRITE
);
cuda_wait
((
*
output
)
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_WRITE
);
err
=
cudnnConvolutionForward
(
err
=
cudnnConvolutionForward
(
APPLY_SPECIFIC
(
_handle
)
,
_handle
,
alpha_p
,
alpha_p
,
APPLY_SPECIFIC
(
input
),
PyGpuArray_DEV_DATA
(
input
),
APPLY_SPECIFIC
(
input
),
PyGpuArray_DEV_DATA
(
input
),
APPLY_SPECIFIC
(
kerns
),
PyGpuArray_DEV_DATA
(
kerns
),
APPLY_SPECIFIC
(
kerns
),
PyGpuArray_DEV_DATA
(
kerns
),
...
...
theano/gpuarray/dnn_gi.c
浏览文件 @
4b94a811
...
@@ -25,11 +25,12 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
...
@@ -25,11 +25,12 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
PyGpuArrayObject
*
im
,
PyGpuArrayObject
*
im
,
cudnnConvolutionDescriptor_t
desc
,
cudnnConvolutionDescriptor_t
desc
,
double
alpha
,
double
beta
,
PyGpuArrayObject
**
input
,
double
alpha
,
double
beta
,
PyGpuArrayObject
**
input
,
PyGpuContextObject
*
c
)
{
cudnnHandle_t
_handle
)
{
cudnnStatus_t
err
=
CUDNN_STATUS_SUCCESS
;
PyGpuContextObject
*
c
=
kerns
->
context
;
float
af
=
alpha
,
bf
=
beta
;
void
*
alpha_p
;
void
*
alpha_p
;
void
*
beta_p
;
void
*
beta_p
;
float
af
=
alpha
,
bf
=
beta
;
cudnnStatus_t
err
=
CUDNN_STATUS_SUCCESS
;
if
(
PyGpuArray_DIMS
(
im
)[
1
]
!=
PyGpuArray_DIMS
(
kerns
)[
1
])
{
if
(
PyGpuArray_DIMS
(
im
)[
1
]
!=
PyGpuArray_DIMS
(
kerns
)[
1
])
{
PyErr_SetString
(
PyExc_ValueError
,
"images and kernel must have the same "
PyErr_SetString
(
PyExc_ValueError
,
"images and kernel must have the same "
...
@@ -93,7 +94,7 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
...
@@ -93,7 +94,7 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
cudnnConvolutionBwdDataAlgoPerf_t
choice
;
cudnnConvolutionBwdDataAlgoPerf_t
choice
;
err
=
cudnnFindConvolutionBackwardDataAlgorithm
(
err
=
cudnnFindConvolutionBackwardDataAlgorithm
(
APPLY_SPECIFIC
(
_handle
)
,
APPLY_SPECIFIC
(
kerns
),
APPLY_SPECIFIC
(
output
),
desc
,
_handle
,
APPLY_SPECIFIC
(
kerns
),
APPLY_SPECIFIC
(
output
),
desc
,
APPLY_SPECIFIC
(
input
),
1
,
&
count
,
&
choice
);
APPLY_SPECIFIC
(
input
),
1
,
&
count
,
&
choice
);
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
...
@@ -116,7 +117,7 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
...
@@ -116,7 +117,7 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
}
}
err
=
cudnnGetConvolutionBackwardDataAlgorithm
(
err
=
cudnnGetConvolutionBackwardDataAlgorithm
(
APPLY_SPECIFIC
(
_handle
)
,
APPLY_SPECIFIC
(
kerns
),
APPLY_SPECIFIC
(
output
),
_handle
,
APPLY_SPECIFIC
(
kerns
),
APPLY_SPECIFIC
(
output
),
desc
,
APPLY_SPECIFIC
(
input
),
desc
,
APPLY_SPECIFIC
(
input
),
CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT
,
free
,
&
algo
);
CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT
,
free
,
&
algo
);
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
...
@@ -193,7 +194,7 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
...
@@ -193,7 +194,7 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
gpudata
*
workspace
;
gpudata
*
workspace
;
err
=
cudnnGetConvolutionBackwardDataWorkspaceSize
(
err
=
cudnnGetConvolutionBackwardDataWorkspaceSize
(
APPLY_SPECIFIC
(
_handle
)
,
APPLY_SPECIFIC
(
kerns
),
APPLY_SPECIFIC
(
output
),
desc
,
_handle
,
APPLY_SPECIFIC
(
kerns
),
APPLY_SPECIFIC
(
output
),
desc
,
APPLY_SPECIFIC
(
input
),
algo
,
&
worksize
);
APPLY_SPECIFIC
(
input
),
algo
,
&
worksize
);
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
...
@@ -218,7 +219,7 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
...
@@ -218,7 +219,7 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
cuda_wait
((
*
input
)
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_WRITE
);
cuda_wait
((
*
input
)
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_WRITE
);
err
=
cudnnConvolutionBackwardData
(
err
=
cudnnConvolutionBackwardData
(
APPLY_SPECIFIC
(
_handle
)
,
_handle
,
alpha_p
,
alpha_p
,
APPLY_SPECIFIC
(
kerns
),
PyGpuArray_DEV_DATA
(
kerns
),
APPLY_SPECIFIC
(
kerns
),
PyGpuArray_DEV_DATA
(
kerns
),
APPLY_SPECIFIC
(
output
),
PyGpuArray_DEV_DATA
(
output
),
APPLY_SPECIFIC
(
output
),
PyGpuArray_DEV_DATA
(
output
),
...
...
theano/gpuarray/dnn_gw.c
浏览文件 @
4b94a811
...
@@ -25,11 +25,12 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
...
@@ -25,11 +25,12 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
PyGpuArrayObject
*
km
,
PyGpuArrayObject
*
km
,
cudnnConvolutionDescriptor_t
desc
,
cudnnConvolutionDescriptor_t
desc
,
double
alpha
,
double
beta
,
PyGpuArrayObject
**
kerns
,
double
alpha
,
double
beta
,
PyGpuArrayObject
**
kerns
,
PyGpuContextObject
*
c
)
{
cudnnHandle_t
_handle
)
{
cudnnStatus_t
err
=
CUDNN_STATUS_SUCCESS
;
PyGpuContextObject
*
c
=
input
->
context
;
float
af
=
alpha
,
bf
=
beta
;
void
*
alpha_p
;
void
*
alpha_p
;
void
*
beta_p
;
void
*
beta_p
;
float
af
=
alpha
,
bf
=
beta
;
cudnnStatus_t
err
=
CUDNN_STATUS_SUCCESS
;
if
(
PyGpuArray_DIMS
(
input
)[
1
]
!=
PyGpuArray_DIMS
(
km
)[
1
])
{
if
(
PyGpuArray_DIMS
(
input
)[
1
]
!=
PyGpuArray_DIMS
(
km
)[
1
])
{
PyErr_SetString
(
PyExc_ValueError
,
PyErr_SetString
(
PyExc_ValueError
,
...
@@ -93,7 +94,7 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
...
@@ -93,7 +94,7 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
cudnnConvolutionBwdFilterAlgoPerf_t
choice
;
cudnnConvolutionBwdFilterAlgoPerf_t
choice
;
err
=
cudnnFindConvolutionBackwardFilterAlgorithm
(
err
=
cudnnFindConvolutionBackwardFilterAlgorithm
(
APPLY_SPECIFIC
(
_handle
)
,
APPLY_SPECIFIC
(
input
),
APPLY_SPECIFIC
(
output
),
desc
,
_handle
,
APPLY_SPECIFIC
(
input
),
APPLY_SPECIFIC
(
output
),
desc
,
APPLY_SPECIFIC
(
kerns
),
1
,
&
count
,
&
choice
);
APPLY_SPECIFIC
(
kerns
),
1
,
&
count
,
&
choice
);
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
...
@@ -117,7 +118,7 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
...
@@ -117,7 +118,7 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
}
}
err
=
cudnnGetConvolutionBackwardFilterAlgorithm
(
err
=
cudnnGetConvolutionBackwardFilterAlgorithm
(
APPLY_SPECIFIC
(
_handle
)
,
APPLY_SPECIFIC
(
input
),
APPLY_SPECIFIC
(
output
),
_handle
,
APPLY_SPECIFIC
(
input
),
APPLY_SPECIFIC
(
output
),
desc
,
APPLY_SPECIFIC
(
kerns
),
desc
,
APPLY_SPECIFIC
(
kerns
),
CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT
,
free
,
&
algo
);
CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT
,
free
,
&
algo
);
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
...
@@ -181,7 +182,7 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
...
@@ -181,7 +182,7 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
gpudata
*
workspace
;
gpudata
*
workspace
;
err
=
cudnnGetConvolutionBackwardFilterWorkspaceSize
(
err
=
cudnnGetConvolutionBackwardFilterWorkspaceSize
(
APPLY_SPECIFIC
(
_handle
)
,
APPLY_SPECIFIC
(
input
),
APPLY_SPECIFIC
(
output
),
desc
,
_handle
,
APPLY_SPECIFIC
(
input
),
APPLY_SPECIFIC
(
output
),
desc
,
APPLY_SPECIFIC
(
kerns
),
algo
,
&
worksize
);
APPLY_SPECIFIC
(
kerns
),
algo
,
&
worksize
);
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
...
@@ -205,7 +206,7 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
...
@@ -205,7 +206,7 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
cuda_wait
((
*
kerns
)
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_WRITE
);
cuda_wait
((
*
kerns
)
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_WRITE
);
err
=
cudnnConvolutionBackwardFilter
(
err
=
cudnnConvolutionBackwardFilter
(
APPLY_SPECIFIC
(
_handle
)
,
_handle
,
alpha_p
,
alpha_p
,
APPLY_SPECIFIC
(
input
),
PyGpuArray_DEV_DATA
(
input
),
APPLY_SPECIFIC
(
input
),
PyGpuArray_DEV_DATA
(
input
),
APPLY_SPECIFIC
(
output
),
PyGpuArray_DEV_DATA
(
output
),
APPLY_SPECIFIC
(
output
),
PyGpuArray_DEV_DATA
(
output
),
...
...
theano/gpuarray/dnn_pool.c
浏览文件 @
4b94a811
...
@@ -42,9 +42,10 @@ int APPLY_SPECIFIC(dnn_pool)(PyGpuArrayObject *img,
...
@@ -42,9 +42,10 @@ int APPLY_SPECIFIC(dnn_pool)(PyGpuArrayObject *img,
PyArrayObject
*
stride
,
PyArrayObject
*
stride
,
PyArrayObject
*
pad
,
PyArrayObject
*
pad
,
PyGpuArrayObject
**
out
,
PyGpuArrayObject
**
out
,
PyGpuContextObject
*
c
)
{
cudnnHandle_t
_handle
)
{
cudnnStatus_t
err
;
PyGpuContextObject
*
c
=
img
->
context
;
size_t
dims
[
5
];
size_t
dims
[
5
];
cudnnStatus_t
err
;
if
(
!
GpuArray_IS_C_CONTIGUOUS
(
&
img
->
ga
))
{
if
(
!
GpuArray_IS_C_CONTIGUOUS
(
&
img
->
ga
))
{
PyErr_SetString
(
PyExc_ValueError
,
"Only contiguous inputs are supported."
);
PyErr_SetString
(
PyExc_ValueError
,
"Only contiguous inputs are supported."
);
...
@@ -122,7 +123,7 @@ int APPLY_SPECIFIC(dnn_pool)(PyGpuArrayObject *img,
...
@@ -122,7 +123,7 @@ int APPLY_SPECIFIC(dnn_pool)(PyGpuArrayObject *img,
cuda_wait
((
*
out
)
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_WRITE
);
cuda_wait
((
*
out
)
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_WRITE
);
err
=
cudnnPoolingForward
(
err
=
cudnnPoolingForward
(
APPLY_SPECIFIC
(
_handle
)
,
APPLY_SPECIFIC
(
pool
),
_handle
,
APPLY_SPECIFIC
(
pool
),
alpha
,
alpha
,
APPLY_SPECIFIC
(
input
),
PyGpuArray_DEV_DATA
(
img
),
APPLY_SPECIFIC
(
input
),
PyGpuArray_DEV_DATA
(
img
),
beta
,
beta
,
...
...
theano/gpuarray/dnn_pool_grad.c
浏览文件 @
4b94a811
...
@@ -64,7 +64,8 @@ int APPLY_SPECIFIC(dnn_pool_grad)(PyGpuArrayObject *inp,
...
@@ -64,7 +64,8 @@ int APPLY_SPECIFIC(dnn_pool_grad)(PyGpuArrayObject *inp,
PyArrayObject
*
stride
,
PyArrayObject
*
stride
,
PyArrayObject
*
pad
,
PyArrayObject
*
pad
,
PyGpuArrayObject
**
inp_grad
,
PyGpuArrayObject
**
inp_grad
,
PyGpuContextObject
*
c
)
{
cudnnHandle_t
_handle
)
{
PyGpuContextObject
*
c
=
inp
->
context
;
cudnnStatus_t
err
;
cudnnStatus_t
err
;
if
(
!
GpuArray_IS_C_CONTIGUOUS
(
&
inp
->
ga
))
{
if
(
!
GpuArray_IS_C_CONTIGUOUS
(
&
inp
->
ga
))
{
...
@@ -153,7 +154,7 @@ int APPLY_SPECIFIC(dnn_pool_grad)(PyGpuArrayObject *inp,
...
@@ -153,7 +154,7 @@ int APPLY_SPECIFIC(dnn_pool_grad)(PyGpuArrayObject *inp,
cuda_wait
((
*
inp_grad
)
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_WRITE
);
cuda_wait
((
*
inp_grad
)
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_WRITE
);
err
=
cudnnPoolingBackward
(
err
=
cudnnPoolingBackward
(
APPLY_SPECIFIC
(
_handle
)
,
APPLY_SPECIFIC
(
pool
),
_handle
,
APPLY_SPECIFIC
(
pool
),
alpha
,
alpha
,
APPLY_SPECIFIC
(
output
),
PyGpuArray_DEV_DATA
(
out
),
APPLY_SPECIFIC
(
output
),
PyGpuArray_DEV_DATA
(
out
),
APPLY_SPECIFIC
(
output_grad
),
PyGpuArray_DEV_DATA
(
out_grad
),
APPLY_SPECIFIC
(
output_grad
),
PyGpuArray_DEV_DATA
(
out_grad
),
...
...
theano/gpuarray/dnn_softmax.c
浏览文件 @
4b94a811
...
@@ -35,7 +35,8 @@ if (APPLY_SPECIFIC(output) != NULL)
...
@@ -35,7 +35,8 @@ if (APPLY_SPECIFIC(output) != NULL)
int
APPLY_SPECIFIC
(
softmax
)(
PyGpuArrayObject
*
x
,
int
APPLY_SPECIFIC
(
softmax
)(
PyGpuArrayObject
*
x
,
PyGpuArrayObject
**
out
,
PyGpuArrayObject
**
out
,
PyGpuContextObject
*
c
)
{
cudnnHandle_t
_handle
)
{
PyGpuContextObject
*
c
=
x
->
context
;
cudnnStatus_t
err
;
cudnnStatus_t
err
;
if
(
c_set_tensorNd
(
x
,
APPLY_SPECIFIC
(
input
))
!=
0
)
if
(
c_set_tensorNd
(
x
,
APPLY_SPECIFIC
(
input
))
!=
0
)
...
@@ -77,7 +78,7 @@ int APPLY_SPECIFIC(softmax)(PyGpuArrayObject *x,
...
@@ -77,7 +78,7 @@ int APPLY_SPECIFIC(softmax)(PyGpuArrayObject *x,
cuda_wait
((
*
out
)
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_WRITE
);
cuda_wait
((
*
out
)
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_WRITE
);
err
=
cudnnSoftmaxForward
(
err
=
cudnnSoftmaxForward
(
APPLY_SPECIFIC
(
_handle
)
,
_handle
,
SOFTMAX_ALGO
,
SOFTMAX_ALGO
,
SOFTMAX_MODE
,
SOFTMAX_MODE
,
alpha
,
alpha
,
...
...
theano/gpuarray/dnn_softmax_grad.c
浏览文件 @
4b94a811
...
@@ -46,7 +46,8 @@ if (APPLY_SPECIFIC(dx) != NULL)
...
@@ -46,7 +46,8 @@ if (APPLY_SPECIFIC(dx) != NULL)
int
APPLY_SPECIFIC
(
softmax_grad
)(
PyGpuArrayObject
*
dy
,
int
APPLY_SPECIFIC
(
softmax_grad
)(
PyGpuArrayObject
*
dy
,
PyGpuArrayObject
*
sm
,
PyGpuArrayObject
*
sm
,
PyGpuArrayObject
**
dx
,
PyGpuArrayObject
**
dx
,
PyGpuContextObject
*
c
)
{
cudnnHandle_t
_handle
)
{
PyGpuContextObject
*
c
=
dy
->
context
;
cudnnStatus_t
err
;
cudnnStatus_t
err
;
if
(
c_set_tensorNd
(
dy
,
APPLY_SPECIFIC
(
dy
))
!=
0
)
if
(
c_set_tensorNd
(
dy
,
APPLY_SPECIFIC
(
dy
))
!=
0
)
...
@@ -91,7 +92,7 @@ int APPLY_SPECIFIC(softmax_grad)(PyGpuArrayObject *dy,
...
@@ -91,7 +92,7 @@ int APPLY_SPECIFIC(softmax_grad)(PyGpuArrayObject *dy,
cuda_wait
((
*
dx
)
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_WRITE
);
cuda_wait
((
*
dx
)
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_WRITE
);
err
=
cudnnSoftmaxBackward
(
err
=
cudnnSoftmaxBackward
(
APPLY_SPECIFIC
(
_handle
)
,
_handle
,
SOFTMAX_ALGO
,
SOFTMAX_ALGO
,
SOFTMAX_MODE
,
SOFTMAX_MODE
,
alpha
,
alpha
,
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论