Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
7dd20e1d
提交
7dd20e1d
authored
12月 02, 2013
作者:
vdumoulin
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #1 from nouiz/vdumoulin-new_backend
Vdumoulin new backend
上级
024347f9
ab250493
隐藏空白字符变更
内嵌
并排
正在显示
4 个修改的文件
包含
114 行增加
和
98 行删除
+114
-98
nnet.py
theano/sandbox/gpuarray/nnet.py
+88
-78
opt.py
theano/sandbox/gpuarray/opt.py
+3
-3
test_nnet.py
theano/sandbox/gpuarray/tests/test_nnet.py
+16
-16
type.py
theano/sandbox/gpuarray/type.py
+7
-1
没有找到文件。
theano/sandbox/gpuarray/nnet.py
浏览文件 @
7dd20e1d
import
numpy
from
theano
import
Op
,
Apply
from
theano.compat.six
import
StringIO
from
theano.sandbox.cuda.nvcc_compiler
import
NVCC_compiler
from
theano.sandbox.cuda.kernel_codegen
import
(
nvcc_kernel
,
inline_softmax
,
inline_softmax_fixed_shared
)
try
:
import
pygpu
from
pygpu
import
gpuarray
,
elemwise
...
...
@@ -13,6 +13,7 @@ except ImportError:
pass
from
theano.sandbox.gpuarray.basic_ops
import
as_gpuarray_variable
from
theano.sandbox.gpuarray.type
import
GpuArrayType
class
GpuCrossentropySoftmaxArgmax1HotWithBias
(
Op
):
...
...
@@ -36,7 +37,8 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias(Op):
x
=
as_gpuarray_variable
(
x
)
b
=
as_gpuarray_variable
(
b
)
y_idx
=
as_gpuarray_variable
(
y_idx
)
nll
=
y_idx
.
type
()
nll
=
GpuArrayType
(
x
.
type
.
dtype
,
y_idx
.
type
.
broadcastable
)()
sm
=
x
.
type
()
am
=
y_idx
.
type
()
return
Apply
(
self
,
[
x
,
b
,
y_idx
],
[
nll
,
sm
,
am
])
...
...
@@ -44,31 +46,31 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias(Op):
def
c_headers
(
self
):
return
[
'cuda.h'
,
'<compyte/extension.h>'
,
'<compyte/numpy_compat.h>'
]
def
c_support_code_apply
(
self
,
node
):
dtype
0
=
node
.
inputs
[
0
]
.
dtype
dtype
1
=
node
.
inputs
[
1
]
.
dtype
dtype
2
=
node
.
inputs
[
2
]
.
dtype
def
c_support_code_apply
(
self
,
node
,
nodename
):
dtype
_x
=
node
.
inputs
[
0
]
.
dtype
dtype
_b
=
node
.
inputs
[
1
]
.
dtype
dtype
_y_idx
=
node
.
inputs
[
2
]
.
dtype
return
"""
__global__ void k_xent_sm_1hot_bias(int M, int N,
const npy_
%(dtype
0
)
s* x_data, int xs0, int xs1,
const npy_
%(dtype
1
)
s* b, int bs0,
const npy_
%(dtype
2
)
s* y_idx_data, int y_idxs0,
npy_
%(dtype)
s* nll_data, int nlls0,
npy_
%(dtype)
s* sm_data, int sms0, int sms1,
npy_
%(dtype)
s* am_data, int ams0)
__global__ void k_xent_sm_1hot_bias
_
%(nodename)
s
(int M, int N,
const npy_
%(dtype
_x
)
s* x_data, int xs0, int xs1,
const npy_
%(dtype
_b
)
s* b, int bs0,
const npy_
%(dtype
_y_idx
)
s* y_idx_data, int y_idxs0,
npy_
%(dtype
_x
)
s* nll_data, int nlls0,
npy_
%(dtype
_x
)
s* sm_data, int sms0, int sms1,
npy_
%(dtype
_y_idx
)
s* am_data, int ams0)
{
for (int row = blockIdx.x; row < M; row += gridDim.x){
const npy_
%(dtype
0
)
s* x = x_data + xs0 * row;
const
int y_idx = (int)
y_idx_data[row * y_idxs0];
npy_
%(dtype
0
)
s* sm = sm_data + sms0 * row;
const npy_
%(dtype
_x
)
s* x = x_data + xs0 * row;
const
npy_
%(dtype_y_idx)
s y_idx =
y_idx_data[row * y_idxs0];
npy_
%(dtype
_x
)
s* sm = sm_data + sms0 * row;
npy_
%(dtype
0
)
s sum = 0.0;
npy_
%(dtype
_x
)
s sum = 0.0;
int row_max_j = 0;
npy_
%(dtype
0
)
s row_max = x[0] + b[0];
npy_
%(dtype
_x
)
s row_max = x[0] + b[0];
for (int j = 1; j < N; ++j)
{
npy_
%(dtype
0
)
s row_ij = x[j*xs1] + b[j*bs0];
npy_
%(dtype
_x
)
s row_ij = x[j*xs1] + b[j*bs0];
//todo: store to shared memory
row_max_j = (row_ij > row_max) ? j : row_max_j;
row_max = (row_ij > row_max) ? row_ij : row_max;
...
...
@@ -76,12 +78,12 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias(Op):
//compute the exp
for (int j = 0; j < N; ++j)
{
npy_
%(dtype
0
)
s row_ij = x[j*xs1] + b[j*bs0];
npy_
%(dtype
0
)
s sm_ij = exp(row_ij - row_max);
npy_
%(dtype
_x
)
s row_ij = x[j*xs1] + b[j*bs0];
npy_
%(dtype
_x
)
s sm_ij = exp(row_ij - row_max);
sum += sm_ij;
sm[j * sms1] = sm_ij;
}
npy_
%(dtype
0
)
s sum_inv = 1.0 / sum;
npy_
%(dtype
_x
)
s sum_inv = 1.0 / sum;
for (int j = 0; j < N; ++j)
{
sm[j * sms1] *= sum_inv;
...
...
@@ -103,14 +105,21 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias(Op):
}
CUdeviceptr (*cuda_get_ptr)(gpudata *g);
"""
"""
%
locals
()
def
c_init_code
(
self
):
return
[
'cuda_get_ptr = (CUdeviceptr (*)(gpudata *g))compyte_get_extension("cuda_get_ptr");'
]
def
c_code
(
self
,
node
,
nodename
,
inp
,
out
,
sub
):
dtype
=
self
.
dtype
typecode
=
pygpu
.
gpuarray
.
dtype_to_typecode
(
dtype
)
typecode_x
=
pygpu
.
gpuarray
.
dtype_to_typecode
(
node
.
inputs
[
0
]
.
dtype
)
typecode_b
=
pygpu
.
gpuarray
.
dtype_to_typecode
(
node
.
inputs
[
1
]
.
dtype
)
typecode_y_idx
=
pygpu
.
gpuarray
.
dtype_to_typecode
(
node
.
inputs
[
2
]
.
dtype
)
itemsize_x
=
numpy
.
dtype
(
node
.
inputs
[
0
]
.
dtype
)
.
itemsize
itemsize_b
=
numpy
.
dtype
(
node
.
inputs
[
1
]
.
dtype
)
.
itemsize
itemsize_y_idx
=
numpy
.
dtype
(
node
.
inputs
[
2
]
.
dtype
)
.
itemsize
itemsize_nll
=
numpy
.
dtype
(
node
.
outputs
[
0
]
.
dtype
)
.
itemsize
itemsize_sm
=
numpy
.
dtype
(
node
.
outputs
[
1
]
.
dtype
)
.
itemsize
itemsize_am
=
numpy
.
dtype
(
node
.
outputs
[
2
]
.
dtype
)
.
itemsize
x
,
b
,
y_idx
=
inp
nll
,
sm
,
am
=
out
classname
=
self
.
__class__
.
__name__
...
...
@@ -151,7 +160,7 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias(Op):
{
Py_XDECREF(
%(nll)
s);
%(nll)
s = pygpu_empty(1, PyGpuArray_DIMS(
%(y_idx)
s),
%(typecode)
s,
%(typecode
_x
)
s,
GA_C_ORDER,
pygpu_default_context(), Py_None);
if (!
%(nll)
s) {
...
...
@@ -166,7 +175,7 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias(Op):
{
Py_XDECREF(
%(sm)
s);
%(sm)
s = pygpu_empty(2, PyGpuArray_DIMS(
%(x)
s),
%(typecode)
s,
%(typecode
_b
)
s,
GA_C_ORDER,
pygpu_default_context(), Py_None);
if(!
%(sm)
s)
...
...
@@ -183,7 +192,7 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias(Op):
{
Py_XDECREF(
%(am)
s);
%(am)
s = pygpu_empty(1, PyGpuArray_DIMS(
%(y_idx)
s),
%(typecode)
s,
%(typecode
_y_idx
)
s,
GA_C_ORDER,
pygpu_default_context(), Py_None);
if(!
%(am)
s)
...
...
@@ -196,37 +205,35 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias(Op):
}
}
{
int n_blocks = std::min(PyGpuArray_DIMS(
%(x)
s)[0],
256);
int n_blocks = PyGpuArray_DIMS(
%(x)
s)[0] < 256 ? PyGpuArray_DIMS(
%(x)
s)[0] : 256;
//TODO: launch more threads per row and do parallel sum and max reductions
int n_threads = 1;
int n_shared_bytes = 0; //n_threads * sizeof(
%(dtype)
s
);
int n_shared_bytes = 0; //n_threads * sizeof(
dtype
);
k_xent_sm_1hot_bias<<<n_blocks, n_threads, n_shared_bytes>>>(
k_xent_sm_1hot_bias
_
%(nodename)
s
<<<n_blocks, n_threads, n_shared_bytes>>>(
PyGpuArray_DIMS(
%(x)
s)[0],
PyGpuArray_DIMS(
%(x)
s)[1],
(dtype_
%(x)
s*)(((char *)cuda_get_ptr(
%(x)
s->ga.data)) +
%(x)
s->ga.offset)
;
PyGpuArray_STRIDES(
%(x)
s)[0],
PyGpuArray_STRIDES(
%(x)
s)[1],
%(x)
s->ga.offset)
,
PyGpuArray_STRIDES(
%(x)
s)[0]
/
%(itemsize_x)
s
,
PyGpuArray_STRIDES(
%(x)
s)[1]
/
%(itemsize_x)
s
,
(dtype_
%(b)
s*)(((char *)cuda_get_ptr(
%(b)
s->ga.data)) +
%(b)
s->ga.offset)
;
PyGpuArray_STRIDES(
%(b)
s)[0],
%(b)
s->ga.offset)
,
PyGpuArray_STRIDES(
%(b)
s)[0]
/
%(itemsize_b)
s
,
(dtype_
%(y_idx)
s*)(((char *)cuda_get_ptr(
%(y_idx)
s->ga.data)) +
%(y_idx)
s->ga.offset)
;
PyGpuArray_STRIDES(
%(y_idx)
s)[0],
%(y_idx)
s->ga.offset)
,
PyGpuArray_STRIDES(
%(y_idx)
s)[0]
/
%(itemsize_y_idx)
s
,
(dtype_
%(nll)
s*)(((char *)cuda_get_ptr(
%(nll)
s->ga.data)) +
%(nll)
s->ga.offset)
;
PyGpuArray_STRIDES(
%(nll)
s)[0],
%(nll)
s->ga.offset)
,
PyGpuArray_STRIDES(
%(nll)
s)[0]
/
%(itemsize_nll)
s
,
(dtype_
%(sm)
s*)(((char *)cuda_get_ptr(
%(sm)
s->ga.data)) +
%(sm)
s->ga.offset)
;
PyGpuArray_STRIDES(
%(sm)
s)[0],
PyGpuArray_STRIDES(
%(sm)
s)[1],
%(sm)
s->ga.offset)
,
PyGpuArray_STRIDES(
%(sm)
s)[0]
/
%(itemsize_sm)
s
,
PyGpuArray_STRIDES(
%(sm)
s)[1]
/
%(itemsize_sm)
s
,
(dtype_
%(am)
s*)(((char *)cuda_get_ptr(
%(am)
s->ga.data)) +
%(am)
s->ga.offset);
PyGpuArray_STRIDES(
%(am)
s)[0]);
CNDA_THREAD_SYNC;
%(am)
s->ga.offset),
PyGpuArray_STRIDES(
%(am)
s)[0] /
%(itemsize_am)
s);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err)
{
...
...
@@ -245,7 +252,7 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias(Op):
def
c_code_cache_version
(
self
):
#return ()
return
(
4
,)
return
(
5
,)
def
c_compiler
(
self
):
return
NVCC_compiler
...
...
@@ -273,11 +280,11 @@ class GpuCrossentropySoftmax1HotWithBiasDx(Op):
def
__str__
(
self
):
return
self
.
__class__
.
__name__
def
make_node
(
self
,
d
y
,
sm
,
y_idx
):
d
y
=
as_gpuarray_variable
(
dy
)
def
make_node
(
self
,
d
nll
,
sm
,
y_idx
):
d
nll
=
as_gpuarray_variable
(
dnll
)
sm
=
as_gpuarray_variable
(
sm
)
y_idx
=
as_gpuarray_variable
(
y_idx
)
return
Apply
(
self
,
[
d
y
,
sm
,
y_idx
],
[
sm
.
type
()])
return
Apply
(
self
,
[
d
nll
,
sm
,
y_idx
],
[
sm
.
type
()])
def
c_code_cache_version
(
self
):
#return ()
...
...
@@ -290,7 +297,11 @@ class GpuCrossentropySoftmax1HotWithBiasDx(Op):
return
NVCC_compiler
def
c_code
(
self
,
node
,
nodename
,
inp
,
out
,
sub
):
typecode
=
pygpu
.
gpuarray
.
dtype_to_typecode
(
node
.
outputs
[
0
]
.
dtype
)
typecode_dx
=
pygpu
.
gpuarray
.
dtype_to_typecode
(
node
.
outputs
[
0
]
.
dtype
)
itemsize_dnll
=
numpy
.
dtype
(
node
.
inputs
[
0
]
.
dtype
)
.
itemsize
itemsize_sm
=
numpy
.
dtype
(
node
.
inputs
[
1
]
.
dtype
)
.
itemsize
itemsize_y_idx
=
numpy
.
dtype
(
node
.
inputs
[
2
]
.
dtype
)
.
itemsize
itemsize_dx
=
numpy
.
dtype
(
node
.
outputs
[
0
]
.
dtype
)
.
itemsize
dnll
,
sm
,
y_idx
=
inp
dx
,
=
out
fail
=
sub
[
'fail'
]
...
...
@@ -326,7 +337,7 @@ class GpuCrossentropySoftmax1HotWithBiasDx(Op):
{
Py_XDECREF(
%(dx)
s);
%(dx)
s = pygpu_empty(2, PyGpuArray_DIMS(
%(sm)
s),
%(typecode)
s,
%(typecode
_dx
)
s,
GA_C_ORDER,
pygpu_default_context(), Py_None);
if (!
%(dx)
s) {
...
...
@@ -334,9 +345,8 @@ class GpuCrossentropySoftmax1HotWithBiasDx(Op):
}
}
{
int n_blocks = std::min(PyGpuArray_DIMS(
%(dx)
s)[0],
256);
int n_threads = std::min(PyGpuArray_DIMS(
%(dx)
s)[1],256);
int n_blocks = PyGpuArray_DIMS(
%(dx)
s)[0] < 256 ? PyGpuArray_DIMS(
%(dx)
s)[0] : 256;
int n_threads = PyGpuArray_DIMS(
%(dx)
s)[1] < 256 ? PyGpuArray_DIMS(
%(dx)
s)[1] : 256;
kCrossEntropySoftmax1HotWithBiasDx_
%(nodename)
s
<<<n_blocks, n_threads>>>(
...
...
@@ -344,24 +354,23 @@ class GpuCrossentropySoftmax1HotWithBiasDx(Op):
PyGpuArray_DIMS(
%(dx)
s)[1],
(dtype_
%(dnll)
s*)(((char *)cuda_get_ptr(
%(dnll)
s->ga.data)) +
%(dnll)
s->ga.offset)
;
PyGpuArray_STRIDES(
%(dnll)
s)[0],
%(dnll)
s->ga.offset)
,
PyGpuArray_STRIDES(
%(dnll)
s)[0]
/
%(itemsize_dnll)
s
,
(dtype_
%(sm)
s*)(((char *)cuda_get_ptr(
%(sm)
s->ga.data)) +
%(sm)
s->ga.offset)
;
PyGpuArray_STRIDES(
%(sm)
s)[0],
PyGpuArray_STRIDES(
%(sm)
s)[1],
%(sm)
s->ga.offset)
,
PyGpuArray_STRIDES(
%(sm)
s)[0]
/
%(itemsize_sm)
s
,
PyGpuArray_STRIDES(
%(sm)
s)[1]
/
%(itemsize_sm)
s
,
(dtype_
%(y_idx)
s*)(((char *)cuda_get_ptr(
%(y_idx)
s->ga.data)) +
%(y_idx)
s->ga.offset)
;
PyGpuArray_STRIDES(
%(y_idx)
s)[0],
%(y_idx)
s->ga.offset)
,
PyGpuArray_STRIDES(
%(y_idx)
s)[0]
/
%(itemsize_y_idx)
s
,
(dtype_
%(dx)
s*)(((char *)cuda_get_ptr(
%(dx)
s->ga.data)) +
%(dx)
s->ga.offset)
;
PyGpuArray_STRIDES(
%(dx)
s)[0],
PyGpuArray_STRIDES(
%(dx)
s)[1]
%(dx)
s->ga.offset)
,
PyGpuArray_STRIDES(
%(dx)
s)[0]
/
%(itemsize_dx)
s
,
PyGpuArray_STRIDES(
%(dx)
s)[1]
/
%(itemsize_dx)
s
);
CNDA_THREAD_SYNC;
cudaError_t err = cudaGetLastError();
if( cudaSuccess != err)
{
...
...
@@ -378,21 +387,22 @@ class GpuCrossentropySoftmax1HotWithBiasDx(Op):
"""
%
locals
()
def
c_support_code_apply
(
self
,
node
,
nodename
):
dtype0
=
node
.
inputs
[
0
]
.
dtype
dtype1
=
node
.
inputs
[
1
]
.
dtype
dtype2
=
node
.
inputs
[
2
]
.
dtype
dtype_dnll
=
node
.
inputs
[
0
]
.
dtype
dtype_sm
=
node
.
inputs
[
1
]
.
dtype
dtype_y_idx
=
node
.
inputs
[
2
]
.
dtype
dtype_dx
=
node
.
outputs
[
0
]
.
dtype
return
"""
__global__ void kCrossEntropySoftmax1HotWithBiasDx_
%(nodename)
s(
int N, int K,
const npy_
%(dtype
0
)
s* dnll, const int dnll_s0,
const npy_
%(dtype
1
)
s* sm, const int sm_s0, const int sm_s1,
const npy_
%(dtype
2
)
s* y_idx, const int y_idx_s0,
npy_
%(dtype
1
)
s* dx, const int dx_s0, const int dx_s1)
const npy_
%(dtype
_dnll
)
s* dnll, const int dnll_s0,
const npy_
%(dtype
_sm
)
s* sm, const int sm_s0, const int sm_s1,
const npy_
%(dtype
_y_idx
)
s* y_idx, const int y_idx_s0,
npy_
%(dtype
_dx
)
s* dx, const int dx_s0, const int dx_s1)
{
for (int i = blockIdx.x; i < N; i += gridDim.x)
{
npy_
%(dtype
0
)
s dnll_i = dnll[i * dnll_s0];
int y_i = (int)
y_idx[i * y_idx_s0];
npy_
%(dtype
_dnll
)
s dnll_i = dnll[i * dnll_s0];
npy_
%(dtype_y_idx)
s y_i =
y_idx[i * y_idx_s0];
for (int j = threadIdx.x; j < K; j += blockDim.x)
{
...
...
theano/sandbox/gpuarray/opt.py
浏览文件 @
7dd20e1d
...
...
@@ -60,7 +60,6 @@ def op_lifter(OP):
def
local_opt
(
node
):
if
type
(
node
.
op
)
in
OP
:
# This does not support nodes that have more than one output.
assert
len
(
node
.
outputs
)
==
1
# either one of our inputs is on the gpu or
# all of our client are on the gpu
if
(
any
([
i
.
owner
and
i
.
owner
.
op
==
host_from_gpu
...
...
@@ -71,7 +70,9 @@ def op_lifter(OP):
# This is needed as sometimes new_op inherit from OP.
if
new_op
and
new_op
!=
node
.
op
:
if
isinstance
(
new_op
,
theano
.
Op
):
return
[
host_from_gpu
(
new_op
(
*
node
.
inputs
))]
return
[
host_from_gpu
(
o
)
for
o
in
new_op
(
*
node
.
inputs
,
return_list
=
True
)]
elif
isinstance
(
new_op
,
(
tuple
,
list
)):
return
[
host_from_gpu
(
o
)
for
o
in
new_op
]
else
:
# suppose it is a variable on the GPU
return
[
host_from_gpu
(
new_op
)]
return
False
...
...
@@ -281,4 +282,3 @@ def local_gpua_crossentropysoftmaxargmax1hotwithbias(node):
@op_lifter
([
tensor
.
nnet
.
CrossentropySoftmax1HotWithBiasDx
])
def
local_gpua_crossentropysoftmax1hotwithbiasdx
(
node
):
return
GpuCrossentropySoftmax1HotWithBiasDx
()
theano/sandbox/gpuarray/tests/test_nnet.py
浏览文件 @
7dd20e1d
...
...
@@ -6,7 +6,7 @@ from theano.gof.python25 import any
import
theano.tensor
as
T
import
theano.tests.unittest_tools
as
utt
import
theano.sandbox.
gpuarray
from
theano.sandbox
import
gpuarray
if
theano
.
sandbox
.
gpuarray
.
pygpu
is
None
:
raise
SkipTest
(
"pygpu not installed"
)
...
...
@@ -20,21 +20,21 @@ if cuda_ndarray.cuda_available and not theano.sandbox.gpuarray.pygpu_activated:
default_to_move_computation_to_gpu
=
False
,
move_shared_float32_to_gpu
=
False
,
enable_cuda
=
False
)
theano
.
sandbox
.
gpuarray
.
init_dev
(
'cuda'
)
gpuarray
.
init_dev
(
'cuda'
)
if
not
theano
.
sandbox
.
gpuarray
.
pygpu_activated
:
if
not
gpuarray
.
pygpu_activated
:
raise
SkipTest
(
"pygpu disabled"
)
from
theano.sandbox.gpuarray.nnet
import
(
GpuCrossentropySoftmaxArgmax1HotWithBias
,
GpuCrossentropySoftmax1HotWithBiasDx
)
from
theano.sandbox.gpuarray.nnet
import
(
GpuCrossentropySoftmaxArgmax1HotWithBias
,
GpuCrossentropySoftmax1HotWithBiasDx
)
if
theano
.
config
.
mode
==
'FAST_COMPILE'
:
mode_with_gpu
=
theano
.
compile
.
mode
.
get_mode
(
'FAST_RUN'
)
.
including
(
'gpu'
)
mode_without_gpu
=
theano
.
compile
.
mode
.
get_mode
(
'FAST_RUN'
)
.
excluding
(
'gpu'
)
mode_with_gpu
=
theano
.
compile
.
mode
.
get_mode
(
'FAST_RUN'
)
.
including
(
'gpuarray'
)
.
excluding
(
'gpu'
)
mode_without_gpu
=
theano
.
compile
.
mode
.
get_mode
(
'FAST_RUN'
)
.
excluding
(
'gpuarray'
)
else
:
mode_with_gpu
=
theano
.
compile
.
mode
.
get_default_mode
()
.
including
(
'gpu'
)
mode_without_gpu
=
theano
.
compile
.
mode
.
get_default_mode
()
.
excluding
(
'gpu'
)
mode_with_gpu
=
theano
.
compile
.
mode
.
get_default_mode
()
.
including
(
'gpu
array'
)
.
excluding
(
'gpu
'
)
mode_without_gpu
=
theano
.
compile
.
mode
.
get_default_mode
()
.
excluding
(
'gpu
array
'
)
def
test_GpuCrossentropySoftmaxArgmax1HotWithBias
():
...
...
@@ -87,7 +87,7 @@ def test_GpuCrossentropySoftmaxArgmax1HotWithBias():
mode
=
mode_without_gpu
)
classify_gpu
=
theano
.
function
(
inputs
=
[
y
,
b
,
dot_result
],
outputs
=
[
loss
,
y_pred
,
dW
],
mode
=
mode_with_gpu
)
mode
=
mode_with_gpu
)
#theano.printing.debugprint(classify)
#theano.printing.debugprint(classify_gpu)
...
...
@@ -95,7 +95,7 @@ def test_GpuCrossentropySoftmaxArgmax1HotWithBias():
T
.
nnet
.
CrossentropySoftmaxArgmax1HotWithBias
)
for
node
in
classify
.
maker
.
fgraph
.
toposort
()])
assert
any
([
isinstance
(
node
.
op
,
theano
.
sandbox
.
gpuarray
.
nnet
.
GpuCrossentropySoftmaxArgmax1HotWithBias
)
GpuCrossentropySoftmaxArgmax1HotWithBias
)
for
node
in
classify_gpu
.
maker
.
fgraph
.
toposort
()])
out
=
classify
(
yy
,
b_values
,
dot_value
)
...
...
@@ -104,7 +104,7 @@ def test_GpuCrossentropySoftmaxArgmax1HotWithBias():
assert
len
(
out
)
==
len
(
gout
)
==
3
assert
numpy
.
allclose
(
out
[
0
],
gout
[
0
])
assert
numpy
.
allclose
(
out
[
2
],
gout
[
2
],
atol
=
3e-6
),
numpy
.
absolute
(
gout
-
out
)
.
max
()
gout
[
2
]
-
out
[
2
]
)
.
max
()
assert
numpy
.
allclose
(
out
[
1
],
gout
[
1
]),
[(
id
,
out
[
1
][
id
],
gout
[
1
][
id
],
val
)
for
id
,
val
in
enumerate
(
out
[
1
]
-
gout
[
1
])
...
...
@@ -150,7 +150,7 @@ def test_GpuCrossentropySoftmax1HotWithBiasDx():
assert
any
([
isinstance
(
node
.
op
,
T
.
nnet
.
CrossentropySoftmax1HotWithBiasDx
)
for
node
in
cpu_f
.
maker
.
fgraph
.
toposort
()])
assert
any
([
isinstance
(
node
.
op
,
theano
.
sandbox
.
gpuarray
.
nnet
.
GpuCrossentropySoftmax1HotWithBiasDx
)
GpuCrossentropySoftmax1HotWithBiasDx
)
for
node
in
gpu_f
.
maker
.
fgraph
.
toposort
()])
cpu_out
=
cpu_f
(
softmax_output_value
)
...
...
@@ -164,7 +164,7 @@ def test_GpuCrossentropySoftmax1HotWithBiasDx():
max_i
=
scaled_err
.
argmax
()
print
'max err index:'
,
max_i
,
max_i
/
batch_size
,
print
max_i
%
batch_size
,
max_i
/
n_out
,
max_i
&
n_out
print
max_i
%
batch_size
,
max_i
/
n_out
,
max_i
&
n_out
print
'At that index:'
print
'err:'
,
scaled_err
.
flatten
()[
max_i
]
print
'absolute error:'
,
abs_err
.
flatten
()[
max_i
]
...
...
@@ -176,4 +176,4 @@ def test_GpuCrossentropySoftmax1HotWithBiasDx():
print
'y_idx_value:'
,
y_idx_value
[
max_i
/
n_out
]
assert
False
,
"numpy.allclose(cpu_out, gpu_out, rtol=
%
s, atol=
%
s)"
%
(
rtol
,
atol
)
rtol
,
atol
)
theano/sandbox/gpuarray/type.py
浏览文件 @
7dd20e1d
...
...
@@ -138,7 +138,13 @@ class GpuArrayType(Type):
return
numpy
.
dtype
(
self
.
dtype
)
.
itemsize
def
c_declare
(
self
,
name
,
sub
):
return
"PyGpuArrayObject *
%
s;"
%
(
name
,)
dtype
=
theano
.
tensor
.
TensorType
(
dtype
=
self
.
dtype
,
broadcastable
=
self
.
broadcastable
)
.
dtype_specs
()[
1
]
return
"""
PyGpuArrayObject *
%(name)
s;
typedef
%(dtype)
s dtype_
%(name)
s;
"""
%
locals
()
def
c_init
(
self
,
name
,
sub
):
return
"
%
s = NULL;"
%
(
name
,)
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论