Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
032c5b14
提交
032c5b14
authored
2月 24, 2011
作者:
David Warde-Farley
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Whitespace fixes.
上级
9d85fda7
隐藏空白字符变更
内嵌
并排
正在显示
7 个修改的文件
包含
79 行增加
和
79 行删除
+79
-79
nnet.py
theano/sandbox/cuda/nnet.py
+17
-17
fourier.py
theano/sandbox/fourier.py
+3
-3
minimal.py
theano/sandbox/minimal.py
+2
-2
multinomial.py
theano/sandbox/multinomial.py
+15
-15
neighbourhoods.py
theano/sandbox/neighbourhoods.py
+10
-10
neighbours.py
theano/sandbox/neighbours.py
+31
-31
truedot.py
theano/sparse/sandbox/truedot.py
+1
-1
没有找到文件。
theano/sandbox/cuda/nnet.py
浏览文件 @
032c5b14
...
@@ -150,15 +150,15 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias (Op):
...
@@ -150,15 +150,15 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias (Op):
k_xent_sm_1hot_bias<<<n_blocks, n_threads, n_shared_bytes>>>(
k_xent_sm_1hot_bias<<<n_blocks, n_threads, n_shared_bytes>>>(
CudaNdarray_HOST_DIMS(
%(x)
s)[0],
CudaNdarray_HOST_DIMS(
%(x)
s)[0],
CudaNdarray_HOST_DIMS(
%(x)
s)[1],
CudaNdarray_HOST_DIMS(
%(x)
s)[1],
CudaNdarray_DEV_DATA(
%(x)
s), CudaNdarray_HOST_STRIDES(
%(x)
s)[0], CudaNdarray_HOST_STRIDES(
%(x)
s)[1],
CudaNdarray_DEV_DATA(
%(x)
s), CudaNdarray_HOST_STRIDES(
%(x)
s)[0], CudaNdarray_HOST_STRIDES(
%(x)
s)[1],
CudaNdarray_DEV_DATA(
%(b)
s), CudaNdarray_HOST_STRIDES(
%(b)
s)[0],
CudaNdarray_DEV_DATA(
%(b)
s), CudaNdarray_HOST_STRIDES(
%(b)
s)[0],
CudaNdarray_DEV_DATA(
%(y_idx)
s), CudaNdarray_HOST_STRIDES(
%(y_idx)
s)[0],
CudaNdarray_DEV_DATA(
%(y_idx)
s), CudaNdarray_HOST_STRIDES(
%(y_idx)
s)[0],
CudaNdarray_DEV_DATA(
%(nll)
s), CudaNdarray_HOST_STRIDES(
%(nll)
s)[0],
CudaNdarray_DEV_DATA(
%(nll)
s), CudaNdarray_HOST_STRIDES(
%(nll)
s)[0],
CudaNdarray_DEV_DATA(
%(sm)
s), CudaNdarray_HOST_STRIDES(
%(sm)
s)[0], CudaNdarray_HOST_STRIDES(
%(sm)
s)[1],
CudaNdarray_DEV_DATA(
%(sm)
s), CudaNdarray_HOST_STRIDES(
%(sm)
s)[0], CudaNdarray_HOST_STRIDES(
%(sm)
s)[1],
CudaNdarray_DEV_DATA(
%(am)
s), CudaNdarray_HOST_STRIDES(
%(am)
s)[0]);
CudaNdarray_DEV_DATA(
%(am)
s), CudaNdarray_HOST_STRIDES(
%(am)
s)[0]);
CNDA_THREAD_SYNC;
CNDA_THREAD_SYNC;
cudaError_t err = cudaGetLastError();
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err)
if (cudaSuccess != err)
{
{
PyErr_Format(PyExc_RuntimeError, "Cuda error:
%(classname)
s
%(nodename)
s:
%%
s.
\\
n", cudaGetErrorString(err));
PyErr_Format(PyExc_RuntimeError, "Cuda error:
%(classname)
s
%(nodename)
s:
%%
s.
\\
n", cudaGetErrorString(err));
// no need to decref output vars the cleanup code should pick them up.
// no need to decref output vars the cleanup code should pick them up.
...
@@ -233,7 +233,7 @@ class GpuCrossentropySoftmax1HotWithBiasDx (Op):
...
@@ -233,7 +233,7 @@ class GpuCrossentropySoftmax1HotWithBiasDx (Op):
std::min(CudaNdarray_HOST_DIMS(
%(dx)
s)[1],256)
std::min(CudaNdarray_HOST_DIMS(
%(dx)
s)[1],256)
>>>(
>>>(
CudaNdarray_HOST_DIMS(
%(dx)
s)[0],
CudaNdarray_HOST_DIMS(
%(dx)
s)[0],
CudaNdarray_HOST_DIMS(
%(dx)
s)[1],
CudaNdarray_HOST_DIMS(
%(dx)
s)[1],
CudaNdarray_DEV_DATA(
%(dnll)
s),
CudaNdarray_DEV_DATA(
%(dnll)
s),
CudaNdarray_HOST_STRIDES(
%(dnll)
s)[0],
CudaNdarray_HOST_STRIDES(
%(dnll)
s)[0],
...
@@ -249,11 +249,11 @@ class GpuCrossentropySoftmax1HotWithBiasDx (Op):
...
@@ -249,11 +249,11 @@ class GpuCrossentropySoftmax1HotWithBiasDx (Op):
);
);
CNDA_THREAD_SYNC;
CNDA_THREAD_SYNC;
cudaError_t err = cudaGetLastError();
cudaError_t err = cudaGetLastError();
if( cudaSuccess != err)
if( cudaSuccess != err)
{
{
PyErr_Format(PyExc_RuntimeError, "Cuda error:
%%
s:
%%
s.
\\
n", "kCrossEntropySoftmax1HotWithBiasDx_
%(nodename)
s", cudaGetErrorString(err));
PyErr_Format(PyExc_RuntimeError, "Cuda error:
%%
s:
%%
s.
\\
n", "kCrossEntropySoftmax1HotWithBiasDx_
%(nodename)
s", cudaGetErrorString(err));
%(fail)
s;
%(fail)
s;
}
}
}
}
assert(
%(dx)
s);
assert(
%(dx)
s);
"""
%
locals
()
"""
%
locals
()
...
@@ -337,7 +337,7 @@ class GpuSoftmax (Op):
...
@@ -337,7 +337,7 @@ class GpuSoftmax (Op):
CudaNdarray_HOST_DIMS(
%(x)
s)[1] * 2 * sizeof(float)
CudaNdarray_HOST_DIMS(
%(x)
s)[1] * 2 * sizeof(float)
>>>(
>>>(
CudaNdarray_HOST_DIMS(
%(x)
s)[0],
CudaNdarray_HOST_DIMS(
%(x)
s)[0],
CudaNdarray_HOST_DIMS(
%(x)
s)[1],
CudaNdarray_HOST_DIMS(
%(x)
s)[1],
CudaNdarray_DEV_DATA(
%(x)
s),
CudaNdarray_DEV_DATA(
%(x)
s),
CudaNdarray_HOST_STRIDES(
%(x)
s)[0],
CudaNdarray_HOST_STRIDES(
%(x)
s)[0],
...
@@ -347,18 +347,18 @@ class GpuSoftmax (Op):
...
@@ -347,18 +347,18 @@ class GpuSoftmax (Op):
);
);
CNDA_THREAD_SYNC;
CNDA_THREAD_SYNC;
cudaError_t err = cudaGetLastError();
cudaError_t err = cudaGetLastError();
if( cudaSuccess != err)
if( cudaSuccess != err)
{
{
PyErr_Format(PyExc_RuntimeError, "Cuda error:
%%
s:
%%
s.
\\
n", "kSoftmax_
%(nodename)
s", cudaGetErrorString(err));
PyErr_Format(PyExc_RuntimeError, "Cuda error:
%%
s:
%%
s.
\\
n", "kSoftmax_
%(nodename)
s", cudaGetErrorString(err));
%(fail)
s;
%(fail)
s;
}
}
}
}
assert(
%(z)
s);
assert(
%(z)
s);
"""
%
locals
()
"""
%
locals
()
def
c_support_code_apply
(
self
,
node
,
nodename
):
def
c_support_code_apply
(
self
,
node
,
nodename
):
return
nvcc_kernel
(
"kSoftmax_
%
s"
%
nodename
,
return
nvcc_kernel
(
"kSoftmax_
%
s"
%
nodename
,
params
=
[
'int M'
,
'int N'
,
params
=
[
'int M'
,
'int N'
,
'const float * x'
,
'const int sx0'
,
'const int sx1'
,
'const float * x'
,
'const int sx0'
,
'const int sx1'
,
'float * sm'
],
'float * sm'
],
body
=
[
body
=
[
...
@@ -436,7 +436,7 @@ class GpuSoftmaxWithBias (Op):
...
@@ -436,7 +436,7 @@ class GpuSoftmaxWithBias (Op):
CudaNdarray_HOST_DIMS(
%(x)
s)[1] * 2 * sizeof(float)
CudaNdarray_HOST_DIMS(
%(x)
s)[1] * 2 * sizeof(float)
>>>(
>>>(
CudaNdarray_HOST_DIMS(
%(x)
s)[0],
CudaNdarray_HOST_DIMS(
%(x)
s)[0],
CudaNdarray_HOST_DIMS(
%(x)
s)[1],
CudaNdarray_HOST_DIMS(
%(x)
s)[1],
CudaNdarray_DEV_DATA(
%(x)
s),
CudaNdarray_DEV_DATA(
%(x)
s),
CudaNdarray_HOST_STRIDES(
%(x)
s)[0],
CudaNdarray_HOST_STRIDES(
%(x)
s)[0],
...
@@ -449,18 +449,18 @@ class GpuSoftmaxWithBias (Op):
...
@@ -449,18 +449,18 @@ class GpuSoftmaxWithBias (Op):
);
);
CNDA_THREAD_SYNC;
CNDA_THREAD_SYNC;
cudaError_t err = cudaGetLastError();
cudaError_t err = cudaGetLastError();
if( cudaSuccess != err)
if( cudaSuccess != err)
{
{
PyErr_Format(PyExc_RuntimeError, "Cuda error:
%%
s:
%%
s.
\\
n", "kSoftmax_
%(nodename)
s", cudaGetErrorString(err));
PyErr_Format(PyExc_RuntimeError, "Cuda error:
%%
s:
%%
s.
\\
n", "kSoftmax_
%(nodename)
s", cudaGetErrorString(err));
%(fail)
s;
%(fail)
s;
}
}
}
}
assert(
%(z)
s);
assert(
%(z)
s);
"""
%
locals
()
"""
%
locals
()
def
c_support_code_apply
(
self
,
node
,
nodename
):
def
c_support_code_apply
(
self
,
node
,
nodename
):
return
nvcc_kernel
(
"kSoftmaxWithBias_
%
s"
%
nodename
,
return
nvcc_kernel
(
"kSoftmaxWithBias_
%
s"
%
nodename
,
params
=
[
'int M'
,
'int N'
,
params
=
[
'int M'
,
'int N'
,
'const float * x'
,
'const int sx0'
,
'const int sx1'
,
'const float * x'
,
'const int sx0'
,
'const int sx1'
,
'const float * b'
,
'const int sb0'
,
'const float * b'
,
'const int sb0'
,
'float * sm'
],
'float * sm'
],
...
...
theano/sandbox/fourier.py
浏览文件 @
032c5b14
...
@@ -18,7 +18,7 @@ grad_todo = GradTodo()
...
@@ -18,7 +18,7 @@ grad_todo = GradTodo()
class
FFT
(
Op
):
class
FFT
(
Op
):
"""Fast Fourier Transform
"""Fast Fourier Transform
.. TODO:
.. TODO:
The current implementation just works for matrix inputs, and permits taking a 1D FFT over
The current implementation just works for matrix inputs, and permits taking a 1D FFT over
either rows or columns. Add support for N-D FFTs as provided by either numpy or FFTW
either rows or columns. Add support for N-D FFTs as provided by either numpy or FFTW
...
@@ -29,7 +29,7 @@ class FFT(Op):
...
@@ -29,7 +29,7 @@ class FFT(Op):
.. TODO:
.. TODO:
unit tests.
unit tests.
"""
"""
default_output
=
0
default_output
=
0
...
@@ -61,7 +61,7 @@ class FFT(Op):
...
@@ -61,7 +61,7 @@ class FFT(Op):
raise
TypeError
(
'Argument to HalfFFT must not be complex'
,
frames
)
raise
TypeError
(
'Argument to HalfFFT must not be complex'
,
frames
)
spectrogram
=
tensor
.
zmatrix
()
spectrogram
=
tensor
.
zmatrix
()
buf
=
generic
()
buf
=
generic
()
# The `buf` output is present for future work
# The `buf` output is present for future work
# when we call FFTW directly and re-use the 'plan' that FFTW creates.
# when we call FFTW directly and re-use the 'plan' that FFTW creates.
# In that case, buf would store a CObject encapsulating the plan.
# In that case, buf would store a CObject encapsulating the plan.
rval
=
Apply
(
self
,
[
_frames
,
_n
,
_axis
],
[
spectrogram
,
buf
])
rval
=
Apply
(
self
,
[
_frames
,
_n
,
_axis
],
[
spectrogram
,
buf
])
...
...
theano/sandbox/minimal.py
浏览文件 @
032c5b14
...
@@ -25,13 +25,13 @@ class Minimal(gof.Op):
...
@@ -25,13 +25,13 @@ class Minimal(gof.Op):
return
hash
(
type
(
self
))
return
hash
(
type
(
self
))
def
make_node
(
self
,
*
args
):
def
make_node
(
self
,
*
args
):
# HERE `args` must be THEANO VARIABLES
# HERE `args` must be THEANO VARIABLES
return
gof
.
Apply
(
op
=
self
,
inputs
=
args
,
outputs
=
[
tensor
.
lscalar
()])
return
gof
.
Apply
(
op
=
self
,
inputs
=
args
,
outputs
=
[
tensor
.
lscalar
()])
def
perform
(
self
,
node
,
inputs
,
(
output
,
)):
def
perform
(
self
,
node
,
inputs
,
(
output
,
)):
# HERE `inputs` are PYTHON OBJECTS
# HERE `inputs` are PYTHON OBJECTS
# do what you want here,
# do what you want here,
# but do not modify any of the arguments [inplace].
# but do not modify any of the arguments [inplace].
print
"perform got
%
i arguments"
%
len
(
inputs
)
print
"perform got
%
i arguments"
%
len
(
inputs
)
...
...
theano/sandbox/multinomial.py
浏览文件 @
032c5b14
...
@@ -59,12 +59,12 @@ class Multinomial(Op):
...
@@ -59,12 +59,12 @@ class Multinomial(Op):
npy_intp dims[2];
npy_intp dims[2];
dims[0] = (
%(pvals)
s->dimensions)[0];
dims[0] = (
%(pvals)
s->dimensions)[0];
dims[1] = (
%(pvals)
s->dimensions)[1];
dims[1] = (
%(pvals)
s->dimensions)[1];
%(z)
s = (PyArrayObject*) PyArray_ZEROS(2,
%(z)
s = (PyArrayObject*) PyArray_ZEROS(2,
dims,
dims,
type_num_
%(pvals)
s,
type_num_
%(pvals)
s,
0);
0);
if (!
%(z)
s)
if (!
%(z)
s)
{
{
PyErr_SetString(PyExc_MemoryError, "failed to alloc z output");
PyErr_SetString(PyExc_MemoryError, "failed to alloc z output");
...
@@ -96,7 +96,7 @@ class Multinomial(Op):
...
@@ -96,7 +96,7 @@ class Multinomial(Op):
}
}
}
}
}
}
} // END NESTED SCOPE
} // END NESTED SCOPE
"""
%
locals
()
"""
%
locals
()
multinomial
=
Multinomial
()
multinomial
=
Multinomial
()
...
@@ -128,24 +128,24 @@ class GpuMultinomial(Multinomial):
...
@@ -128,24 +128,24 @@ class GpuMultinomial(Multinomial):
float * global_unis,
float * global_unis,
float * global_outs
float * global_outs
)
)
{
{
int n = blockDim.x*blockIdx.x + threadIdx.x;
int n = blockDim.x*blockIdx.x + threadIdx.x;
if (n < nb_multi)
if (n < nb_multi)
{
{
float cummul = 0.;
float cummul = 0.;
bool done = false;
bool done = false;
for (int m = 0; m < nb_outcomes; ++m)
for (int m = 0; m < nb_outcomes; ++m)
{
{
cummul += global_pvals[n * pvals_col_strides + m * pvals_row_strides];
cummul += global_pvals[n * pvals_col_strides + m * pvals_row_strides];
float current_out = 0.;
float current_out = 0.;
if (!done && global_unis[n] < cummul)
if (!done && global_unis[n] < cummul)
{
{
current_out = 1.;
current_out = 1.;
done = true;
done = true;
}
}
global_outs[n + m * nb_multi] = current_out;
global_outs[n + m * nb_multi] = current_out;
}
}
}
}
...
@@ -157,7 +157,7 @@ class GpuMultinomial(Multinomial):
...
@@ -157,7 +157,7 @@ class GpuMultinomial(Multinomial):
def
c_code
(
self
,
node
,
name
,
(
pvals
,
unis
),
(
z
,),
sub
):
def
c_code
(
self
,
node
,
name
,
(
pvals
,
unis
),
(
z
,),
sub
):
fail
=
sub
[
'fail'
]
fail
=
sub
[
'fail'
]
return
"""
return
"""
if (
%(pvals)
s->nd != 2)
if (
%(pvals)
s->nd != 2)
{
{
PyErr_Format(PyExc_TypeError, "pvals wrong rank");
PyErr_Format(PyExc_TypeError, "pvals wrong rank");
...
@@ -168,7 +168,7 @@ class GpuMultinomial(Multinomial):
...
@@ -168,7 +168,7 @@ class GpuMultinomial(Multinomial):
PyErr_Format(PyExc_TypeError, "unis wrong rank");
PyErr_Format(PyExc_TypeError, "unis wrong rank");
%(fail)
s;
%(fail)
s;
}
}
if (CudaNdarray_HOST_DIMS(
%(unis)
s)[0] != CudaNdarray_HOST_DIMS(
%(pvals)
s)[1])
if (CudaNdarray_HOST_DIMS(
%(unis)
s)[0] != CudaNdarray_HOST_DIMS(
%(pvals)
s)[1])
{
{
PyErr_Format(PyExc_ValueError, "unis.shape[0] != pvals.shape[1]");
PyErr_Format(PyExc_ValueError, "unis.shape[0] != pvals.shape[1]");
...
@@ -201,7 +201,7 @@ class GpuMultinomial(Multinomial):
...
@@ -201,7 +201,7 @@ class GpuMultinomial(Multinomial):
{ // NESTED SCOPE
{ // NESTED SCOPE
int nb_outcomes = CudaNdarray_HOST_DIMS(
%(z)
s)[0];
int nb_outcomes = CudaNdarray_HOST_DIMS(
%(z)
s)[0];
int nb_multi = CudaNdarray_HOST_DIMS(
%(z)
s)[1];
int nb_multi = CudaNdarray_HOST_DIMS(
%(z)
s)[1];
//TODO : change this for a beautiful constant
//TODO : change this for a beautiful constant
int max_nb_blocks = 2<<15 - 1;
int max_nb_blocks = 2<<15 - 1;
int nb_blocks = max_nb_blocks + 1;
int nb_blocks = max_nb_blocks + 1;
...
@@ -212,7 +212,7 @@ class GpuMultinomial(Multinomial):
...
@@ -212,7 +212,7 @@ class GpuMultinomial(Multinomial):
if (nb_multi
%%
nb_threads == 0)
if (nb_multi
%%
nb_threads == 0)
nb_blocks = nb_multi/nb_threads;
nb_blocks = nb_multi/nb_threads;
else
else
nb_blocks = (int)((float)nb_multi/(float)nb_threads + 1.);
nb_blocks = (int)((float)nb_multi/(float)nb_threads + 1.);
} while (nb_blocks > max_nb_blocks);
} while (nb_blocks > max_nb_blocks);
//printf("
\\
nN=
%%
i b=
%%
i t=
%%
i t*b=
%%
i", nb_multi, nb_blocks, nb_threads, nb_blocks*nb_threads);
//printf("
\\
nN=
%%
i b=
%%
i t=
%%
i t*b=
%%
i", nb_multi, nb_blocks, nb_threads, nb_blocks*nb_threads);
...
@@ -224,7 +224,7 @@ class GpuMultinomial(Multinomial):
...
@@ -224,7 +224,7 @@ class GpuMultinomial(Multinomial):
%(fail)
s;
%(fail)
s;
}
}
dim3 n_blocks(nb_blocks,1,1);
dim3 n_blocks(nb_blocks,1,1);
dim3 n_threads(nb_threads,1,1);
dim3 n_threads(nb_threads,1,1);
int n_shared = 0;
int n_shared = 0;
...
@@ -240,7 +240,7 @@ class GpuMultinomial(Multinomial):
...
@@ -240,7 +240,7 @@ class GpuMultinomial(Multinomial):
);
);
CNDA_THREAD_SYNC;
CNDA_THREAD_SYNC;
cudaError_t sts = cudaGetLastError();
cudaError_t sts = cudaGetLastError();
if (cudaSuccess != sts)
if (cudaSuccess != sts)
{
{
PyErr_Format(PyExc_RuntimeError, "Cuda error:
%%
s:
%%
s. (grid:
%%
i x
%%
i; block:
%%
i x
%%
i x
%%
i; shared:
%%
i)
\\
n",
PyErr_Format(PyExc_RuntimeError, "Cuda error:
%%
s:
%%
s. (grid:
%%
i x
%%
i; block:
%%
i x
%%
i x
%%
i; shared:
%%
i)
\\
n",
"k_multi_warp_
%(name)
s",
"k_multi_warp_
%(name)
s",
...
@@ -264,4 +264,4 @@ def use_gpu_multinomial(node):
...
@@ -264,4 +264,4 @@ def use_gpu_multinomial(node):
return
[
host_from_gpu
(
gpu_multinomial
(
*
[
gpu_from_host
(
i
)
for
i
in
node
.
inputs
]))]
return
[
host_from_gpu
(
gpu_multinomial
(
*
[
gpu_from_host
(
i
)
for
i
in
node
.
inputs
]))]
if
cuda_enabled
:
#theano.config.device.startswith('gpu'):
if
cuda_enabled
:
#theano.config.device.startswith('gpu'):
register_specialize
(
use_gpu_multinomial
)
register_specialize
(
use_gpu_multinomial
)
theano/sandbox/neighbourhoods.py
浏览文件 @
032c5b14
...
@@ -7,7 +7,7 @@ import numpy
...
@@ -7,7 +7,7 @@ import numpy
import
__builtin__
import
__builtin__
class
NeighbourhoodsFromImages
(
Op
):
class
NeighbourhoodsFromImages
(
Op
):
def
__init__
(
self
,
n_dims_before
,
dims_neighbourhoods
,
def
__init__
(
self
,
n_dims_before
,
dims_neighbourhoods
,
strides
=
None
,
ignore_border
=
False
,
inverse
=
False
):
strides
=
None
,
ignore_border
=
False
,
inverse
=
False
):
"""
"""
This extracts neighbourhoods from "images", but in a
This extracts neighbourhoods from "images", but in a
...
@@ -65,10 +65,10 @@ class NeighbourhoodsFromImages(Op):
...
@@ -65,10 +65,10 @@ class NeighbourhoodsFromImages(Op):
"""
"""
self
.
n_dims_before
=
n_dims_before
self
.
n_dims_before
=
n_dims_before
self
.
dims_neighbourhoods
=
dims_neighbourhoods
self
.
dims_neighbourhoods
=
dims_neighbourhoods
if
not
strides
is
None
:
if
not
strides
is
None
:
self
.
strides
=
strides
self
.
strides
=
strides
else
:
else
:
self
.
strides
=
dims_neighbourhoods
self
.
strides
=
dims_neighbourhoods
self
.
ignore_border
=
ignore_border
self
.
ignore_border
=
ignore_border
self
.
inverse
=
inverse
self
.
inverse
=
inverse
...
@@ -99,7 +99,7 @@ class NeighbourhoodsFromImages(Op):
...
@@ -99,7 +99,7 @@ class NeighbourhoodsFromImages(Op):
def
__str__
(
self
):
def
__str__
(
self
):
return
'
%
s{
%
s,
%
s,
%
s,
%
s}'
%
\
return
'
%
s{
%
s,
%
s,
%
s,
%
s}'
%
\
(
self
.
__class__
.
__name__
,
(
self
.
__class__
.
__name__
,
self
.
n_dims_before
,
self
.
n_dims_before
,
self
.
dims_neighbourhoods
,
self
.
dims_neighbourhoods
,
self
.
strides
,
self
.
strides
,
...
@@ -135,7 +135,7 @@ class NeighbourhoodsFromImages(Op):
...
@@ -135,7 +135,7 @@ class NeighbourhoodsFromImages(Op):
# the number of strides performed by NeighFromImg is
# the number of strides performed by NeighFromImg is
# directly given by this shape
# directly given by this shape
num_strides
.
append
(
output_shape
[
self
.
n_dims_before
+
i
])
num_strides
.
append
(
output_shape
[
self
.
n_dims_before
+
i
])
# our Op's output image must be at least this wide
# our Op's output image must be at least this wide
at_least_width
=
num_strides
[
i
]
*
self
.
strides
[
i
]
at_least_width
=
num_strides
[
i
]
*
self
.
strides
[
i
]
...
@@ -231,7 +231,7 @@ class NeighbourhoodsFromImages(Op):
...
@@ -231,7 +231,7 @@ class NeighbourhoodsFromImages(Op):
(
"for neigh_idx_
%
d in xrange(min(max_neigh_idx_
%
d,"
\
(
"for neigh_idx_
%
d in xrange(min(max_neigh_idx_
%
d,"
\
+
" self.dims_neighbourhoods[
%
d])):
\n
"
)
%
\
+
" self.dims_neighbourhoods[
%
d])):
\n
"
)
%
\
(
inner_dim_no
,
inner_dim_no
,
inner_dim_no
)
(
inner_dim_no
,
inner_dim_no
,
inner_dim_no
)
return
code_before
return
code_before
def
_py_flattened_idx
(
self
):
def
_py_flattened_idx
(
self
):
...
@@ -268,8 +268,8 @@ class NeighbourhoodsFromImages(Op):
...
@@ -268,8 +268,8 @@ class NeighbourhoodsFromImages(Op):
class
ImagesFromNeighbourhoods
(
NeighbourhoodsFromImages
):
class
ImagesFromNeighbourhoods
(
NeighbourhoodsFromImages
):
def
__init__
(
self
,
n_dims_before
,
dims_neighbourhoods
,
def
__init__
(
self
,
n_dims_before
,
dims_neighbourhoods
,
strides
=
None
,
ignore_border
=
False
):
strides
=
None
,
ignore_border
=
False
):
NeighbourhoodsFromImages
.
__init__
(
self
,
n_dims_before
,
dims_neighbourhoods
,
NeighbourhoodsFromImages
.
__init__
(
self
,
n_dims_before
,
dims_neighbourhoods
,
strides
=
strides
,
ignore_border
=
ignore_border
,
strides
=
strides
,
ignore_border
=
ignore_border
,
inverse
=
True
)
inverse
=
True
)
# and that's all there is to it
# and that's all there is to it
theano/sandbox/neighbours.py
浏览文件 @
032c5b14
...
@@ -88,7 +88,7 @@ class Images2Neibs(Op):
...
@@ -88,7 +88,7 @@ class Images2Neibs(Op):
PyErr_Format(PyExc_TypeError, "neib_step wrong step ; has to contain 2 elements");
PyErr_Format(PyExc_TypeError, "neib_step wrong step ; has to contain 2 elements");
%(fail)
s;
%(fail)
s;
}
}
// (c,d) = neib_shape
// (c,d) = neib_shape
const npy_intp c = (npy_intp) *(dtype_
%(neib_shape)
s*) PyArray_GETPTR1(
%(neib_shape)
s, 0);
const npy_intp c = (npy_intp) *(dtype_
%(neib_shape)
s*) PyArray_GETPTR1(
%(neib_shape)
s, 0);
const npy_intp d = (npy_intp) *(dtype_
%(neib_shape)
s*) PyArray_GETPTR1(
%(neib_shape)
s, 1);
const npy_intp d = (npy_intp) *(dtype_
%(neib_shape)
s*) PyArray_GETPTR1(
%(neib_shape)
s, 1);
...
@@ -137,7 +137,7 @@ class Images2Neibs(Op):
...
@@ -137,7 +137,7 @@ class Images2Neibs(Op):
* grid_d
* grid_d
* (
%(ten4)
s->dimensions)[1]
* (
%(ten4)
s->dimensions)[1]
* (
%(ten4)
s->dimensions)[0];
* (
%(ten4)
s->dimensions)[0];
if ((NULL ==
%(z)
s)
if ((NULL ==
%(z)
s)
|| ((
%(z)
s->dimensions)[0] != z_dim0 )
|| ((
%(z)
s->dimensions)[0] != z_dim0 )
|| ((
%(z)
s->dimensions)[1] != z_dim1 )
|| ((
%(z)
s->dimensions)[1] != z_dim1 )
...
@@ -147,12 +147,12 @@ class Images2Neibs(Op):
...
@@ -147,12 +147,12 @@ class Images2Neibs(Op):
npy_intp dims[2];
npy_intp dims[2];
dims[0] = z_dim0;
dims[0] = z_dim0;
dims[1] = z_dim1;
dims[1] = z_dim1;
%(z)
s = (PyArrayObject*) PyArray_EMPTY(2,
%(z)
s = (PyArrayObject*) PyArray_EMPTY(2,
dims,
dims,
type_num_
%(ten4)
s,
type_num_
%(ten4)
s,
0);
0);
if (!
%(z)
s)
if (!
%(z)
s)
{
{
PyErr_SetString(PyExc_MemoryError, "failed to alloc z output");
PyErr_SetString(PyExc_MemoryError, "failed to alloc z output");
...
@@ -162,12 +162,12 @@ class Images2Neibs(Op):
...
@@ -162,12 +162,12 @@ class Images2Neibs(Op):
}
}
{ // NESTED SCOPE
{ // NESTED SCOPE
const int nb_batch = (
%(ten4)
s->dimensions)[0];
const int nb_batch = (
%(ten4)
s->dimensions)[0];
const int nb_stack = (
%(ten4)
s->dimensions)[1];
const int nb_stack = (
%(ten4)
s->dimensions)[1];
const int height = (
%(ten4)
s->dimensions)[2];
const int height = (
%(ten4)
s->dimensions)[2];
const int width = (
%(ten4)
s->dimensions)[3];
const int width = (
%(ten4)
s->dimensions)[3];
// (c,d) = neib_shape
// (c,d) = neib_shape
const npy_intp c = (npy_intp) *(dtype_
%(neib_shape)
s*) PyArray_GETPTR1(
%(neib_shape)
s, 0);
const npy_intp c = (npy_intp) *(dtype_
%(neib_shape)
s*) PyArray_GETPTR1(
%(neib_shape)
s, 0);
const npy_intp d = (npy_intp) *(dtype_
%(neib_shape)
s*) PyArray_GETPTR1(
%(neib_shape)
s, 1);
const npy_intp d = (npy_intp) *(dtype_
%(neib_shape)
s*) PyArray_GETPTR1(
%(neib_shape)
s, 1);
...
@@ -177,7 +177,7 @@ class Images2Neibs(Op):
...
@@ -177,7 +177,7 @@ class Images2Neibs(Op):
const int wrap_centered_idx_shift_x = c/2;
const int wrap_centered_idx_shift_x = c/2;
const int wrap_centered_idx_shift_y = d/2;
const int wrap_centered_idx_shift_y = d/2;
// Oh this is messed up...
// Oh this is messed up...
for (int n = 0; n < nb_batch; n++) // loop over batches
for (int n = 0; n < nb_batch; n++) // loop over batches
for (int s = 0; s < nb_stack; s++) // loop over stacks
for (int s = 0; s < nb_stack; s++) // loop over stacks
for (int a = 0; a < grid_c; a++) // loop over the number of patch in height
for (int a = 0; a < grid_c; a++) // loop over the number of patch in height
...
@@ -194,18 +194,18 @@ class Images2Neibs(Op):
...
@@ -194,18 +194,18 @@ class Images2Neibs(Op):
}
}
for (int j = 0; j < d; j++) // loop over d
for (int j = 0; j < d; j++) // loop over d
{
{
int ten4_3 = j + b * step_y;
int ten4_3 = j + b * step_y;
if ( "
%(mode)
s" == "wrap_centered" ){
if ( "
%(mode)
s" == "wrap_centered" ){
ten4_3 -= wrap_centered_idx_shift_y;
ten4_3 -= wrap_centered_idx_shift_y;
if ( ten4_3 < 0 ) ten4_3 += width;
if ( ten4_3 < 0 ) ten4_3 += width;
else if (ten4_3 >= width) ten4_3 -= width;
else if (ten4_3 >= width) ten4_3 -= width;
}
}
int z_col = j + d * i;
int z_col = j + d * i;
dtype_
%(z)
s* curr_z = (dtype_
%(z)
s*) PyArray_GETPTR2(
%(z)
s, z_row, z_col);
dtype_
%(z)
s* curr_z = (dtype_
%(z)
s*) PyArray_GETPTR2(
%(z)
s, z_row, z_col);
*curr_z = *( (dtype_
%(ten4)
s*) PyArray_GETPTR4(
%(ten4)
s, n, s, ten4_2, ten4_3));
*curr_z = *( (dtype_
%(ten4)
s*) PyArray_GETPTR4(
%(ten4)
s, n, s, ten4_2, ten4_3));
//printf("
\\
n(
%%
i,
%%
i,
%%
i,
%%
i) --> (
%%
i,
%%
i)",n,s, ten4_2, ten4_3, z_row, z_col);
//printf("
\\
n(
%%
i,
%%
i,
%%
i,
%%
i) --> (
%%
i,
%%
i)",n,s, ten4_2, ten4_3, z_row, z_col);
//printf("
%%
f ", *curr_z);
//printf("
%%
f ", *curr_z);
}
}
...
@@ -220,22 +220,22 @@ def images2neibs(ten4, neib_shape, neib_step=None, mode='valid'):
...
@@ -220,22 +220,22 @@ def images2neibs(ten4, neib_shape, neib_step=None, mode='valid'):
def
neibs2images
(
neibs
,
neib_shape
,
original_shape
):
def
neibs2images
(
neibs
,
neib_shape
,
original_shape
):
"""
"""
Inverse of images2neib.
Inverse of images2neib.
neibs : matrix like the one obtained by images2neib
neibs : matrix like the one obtained by images2neib
neib_shape : neib_shape that was used in images2neib
neib_shape : neib_shape that was used in images2neib
original_shape : original shape of the 4d tensor given to images2neib
original_shape : original shape of the 4d tensor given to images2neib
Return a 4d tensor of shape `original_shape`.
Return a 4d tensor of shape `original_shape`.
"""
"""
neibs
=
T
.
as_tensor_variable
(
neibs
)
neibs
=
T
.
as_tensor_variable
(
neibs
)
neib_shape
=
T
.
as_tensor_variable
(
neib_shape
)
neib_shape
=
T
.
as_tensor_variable
(
neib_shape
)
original_shape
=
T
.
as_tensor_variable
(
original_shape
)
original_shape
=
T
.
as_tensor_variable
(
original_shape
)
new_neib_shape
=
T
.
stack
(
original_shape
[
-
1
]
/
neib_shape
[
1
],
neib_shape
[
1
]
)
new_neib_shape
=
T
.
stack
(
original_shape
[
-
1
]
/
neib_shape
[
1
],
neib_shape
[
1
]
)
return
images2neibs
(
neibs
.
dimshuffle
(
'x'
,
'x'
,
0
,
1
),
new_neib_shape
)
.
reshape
(
original_shape
)
return
images2neibs
(
neibs
.
dimshuffle
(
'x'
,
'x'
,
0
,
1
),
new_neib_shape
)
.
reshape
(
original_shape
)
#return images2neibs(neibs.reshape((1,1,neibs.shape[0],neibs.shape[1])), new_neib_shape).reshape(original_shape)
#return images2neibs(neibs.reshape((1,1,neibs.shape[0],neibs.shape[1])), new_neib_shape).reshape(original_shape)
# This is work in progress
# This is work in progress
class
GpuImages2Neibs
(
Images2Neibs
):
class
GpuImages2Neibs
(
Images2Neibs
):
def
__init__
(
self
,
mode
=
'valid'
):
def
__init__
(
self
,
mode
=
'valid'
):
...
@@ -251,7 +251,7 @@ class GpuImages2Neibs(Images2Neibs):
...
@@ -251,7 +251,7 @@ class GpuImages2Neibs(Images2Neibs):
assert
ten4
.
ndim
==
4
assert
ten4
.
ndim
==
4
assert
neib_shape
.
ndim
==
1
assert
neib_shape
.
ndim
==
1
assert
neib_step
.
ndim
==
1
assert
neib_step
.
ndim
==
1
return
Apply
(
self
,
[
ten4
,
neib_shape
,
neib_step
],
[
CudaNdarrayType
(
broadcastable
=
(
False
,
False
),
return
Apply
(
self
,
[
ten4
,
neib_shape
,
neib_step
],
[
CudaNdarrayType
(
broadcastable
=
(
False
,
False
),
dtype
=
ten4
.
type
.
dtype
)()])
dtype
=
ten4
.
type
.
dtype
)()])
...
@@ -313,8 +313,8 @@ class GpuImages2Neibs(Images2Neibs):
...
@@ -313,8 +313,8 @@ class GpuImages2Neibs(Images2Neibs):
}
}
//int ten4_idx = ten4_3 + width*(ten4_2 + height*(s +nb_stack*n));
//int ten4_idx = ten4_3 + width*(ten4_2 + height*(s +nb_stack*n));
//int ten4_idx = stride3*ten4_3 + stride2*(ten4_2 + stride1*(s + stride0*n));
//int ten4_idx = stride3*ten4_3 + stride2*(ten4_2 + stride1*(s + stride0*n));
int ten4_idx = stride3*ten4_3 + stride2*ten4_2 + stride1*s + stride0*n;
int ten4_idx = stride3*ten4_3 + stride2*ten4_2 + stride1*s + stride0*n;
int z_col = j + d * i;
int z_col = j + d * i;
int z_idx = z_col + c*d*z_row;
int z_idx = z_col + c*d*z_row;
...
@@ -375,8 +375,8 @@ class GpuImages2Neibs(Images2Neibs):
...
@@ -375,8 +375,8 @@ class GpuImages2Neibs(Images2Neibs):
}
}
//int ten4_idx = ten4_3 + width*(ten4_2 + height*(s +nb_stack*n));
//int ten4_idx = ten4_3 + width*(ten4_2 + height*(s +nb_stack*n));
//int ten4_idx = stride3*ten4_3 + stride2*(ten4_2 + stride1*(s + stride0*n));
//int ten4_idx = stride3*ten4_3 + stride2*(ten4_2 + stride1*(s + stride0*n));
int ten4_idx = stride3*ten4_3 + stride2*ten4_2 + stride1*s + stride0*n;
int ten4_idx = stride3*ten4_3 + stride2*ten4_2 + stride1*s + stride0*n;
int z_col = j + d * i;
int z_col = j + d * i;
int z_idx = z_col + c*d*z_row;
int z_idx = z_col + c*d*z_row;
...
@@ -406,7 +406,7 @@ class GpuImages2Neibs(Images2Neibs):
...
@@ -406,7 +406,7 @@ class GpuImages2Neibs(Images2Neibs):
PyErr_Format(PyExc_TypeError, "unis wrong rank");
PyErr_Format(PyExc_TypeError, "unis wrong rank");
%(fail)
s;
%(fail)
s;
}
}
if (
%(neib_shape)
s->dimensions[0] != 2)
if (
%(neib_shape)
s->dimensions[0] != 2)
{
{
PyErr_Format(PyExc_ValueError, "neib_shape has to contain two elements");
PyErr_Format(PyExc_ValueError, "neib_shape has to contain two elements");
...
@@ -459,7 +459,7 @@ class GpuImages2Neibs(Images2Neibs):
...
@@ -459,7 +459,7 @@ class GpuImages2Neibs(Images2Neibs):
* grid_d
* grid_d
* CudaNdarray_HOST_DIMS(
%(ten4)
s)[1]
* CudaNdarray_HOST_DIMS(
%(ten4)
s)[1]
* CudaNdarray_HOST_DIMS(
%(ten4)
s)[0];
* CudaNdarray_HOST_DIMS(
%(ten4)
s)[0];
if ((NULL ==
%(z)
s)
if ((NULL ==
%(z)
s)
|| (CudaNdarray_HOST_DIMS(
%(z)
s)[0] != z_dim0)
|| (CudaNdarray_HOST_DIMS(
%(z)
s)[0] != z_dim0)
|| (CudaNdarray_HOST_DIMS(
%(z)
s)[1] != z_dim1))
|| (CudaNdarray_HOST_DIMS(
%(z)
s)[1] != z_dim1))
...
@@ -475,11 +475,11 @@ class GpuImages2Neibs(Images2Neibs):
...
@@ -475,11 +475,11 @@ class GpuImages2Neibs(Images2Neibs):
%(fail)
s;
%(fail)
s;
}
}
}
}
}
}
{ // NESTED SCOPE
{ // NESTED SCOPE
const int nb_batch = CudaNdarray_HOST_DIMS(
%(ten4)
s)[0];
const int nb_batch = CudaNdarray_HOST_DIMS(
%(ten4)
s)[0];
const int nb_stack = CudaNdarray_HOST_DIMS(
%(ten4)
s)[1];
const int nb_stack = CudaNdarray_HOST_DIMS(
%(ten4)
s)[1];
const int height = CudaNdarray_HOST_DIMS(
%(ten4)
s)[2];
const int height = CudaNdarray_HOST_DIMS(
%(ten4)
s)[2];
...
@@ -489,11 +489,11 @@ class GpuImages2Neibs(Images2Neibs):
...
@@ -489,11 +489,11 @@ class GpuImages2Neibs(Images2Neibs):
const int d = *(dtype_
%(neib_shape)
s*) PyArray_GETPTR1(
%(neib_shape)
s, 1);
const int d = *(dtype_
%(neib_shape)
s*) PyArray_GETPTR1(
%(neib_shape)
s, 1);
const npy_intp step_x = (npy_intp) *(dtype_
%(neib_step)
s*) PyArray_GETPTR1(
%(neib_step)
s, 0);
const npy_intp step_x = (npy_intp) *(dtype_
%(neib_step)
s*) PyArray_GETPTR1(
%(neib_step)
s, 0);
const npy_intp step_y = (npy_intp) *(dtype_
%(neib_step)
s*) PyArray_GETPTR1(
%(neib_step)
s, 1);
const npy_intp step_y = (npy_intp) *(dtype_
%(neib_step)
s*) PyArray_GETPTR1(
%(neib_step)
s, 1);
dim3 n_threads(d,c,1);
dim3 n_threads(d,c,1);
//Their is a max of 512 threads per blocks
//Their is a max of 512 threads per blocks
while(n_threads.x*n_threads.y>512 && n_threads.y>1)n_threads.y--;
while(n_threads.x*n_threads.y>512 && n_threads.y>1)n_threads.y--;
while(n_threads.x*n_threads.y>512 && n_threads.x>1)n_threads.x--;
while(n_threads.x*n_threads.y>512 && n_threads.x>1)n_threads.x--;
//Make bigger block to have better memory access pattern and a higher core utilisation.
//Make bigger block to have better memory access pattern and a higher core utilisation.
//for smaller patch size
//for smaller patch size
...
@@ -519,7 +519,7 @@ class GpuImages2Neibs(Images2Neibs):
...
@@ -519,7 +519,7 @@ class GpuImages2Neibs(Images2Neibs):
f = k_multi_warp_
%(name)
s;
f = k_multi_warp_
%(name)
s;
}
}
f<<<n_blocks, n_threads, n_shared>>>(
f<<<n_blocks, n_threads, n_shared>>>(
nb_batch,
nb_batch,
nb_stack,
nb_stack,
height, width,
height, width,
...
@@ -534,7 +534,7 @@ class GpuImages2Neibs(Images2Neibs):
...
@@ -534,7 +534,7 @@ class GpuImages2Neibs(Images2Neibs):
);
);
CNDA_THREAD_SYNC;
CNDA_THREAD_SYNC;
cudaError_t sts = cudaGetLastError();
cudaError_t sts = cudaGetLastError();
if (cudaSuccess != sts)
if (cudaSuccess != sts)
{
{
PyErr_Format(PyExc_RuntimeError, "Cuda error:
%%
s:
%%
s. (grid:
%%
i x
%%
i; block:
%%
i x
%%
i x
%%
i; shared:
%%
i)
\\
n",
PyErr_Format(PyExc_RuntimeError, "Cuda error:
%%
s:
%%
s. (grid:
%%
i x
%%
i; block:
%%
i x
%%
i x
%%
i; shared:
%%
i)
\\
n",
"k_multi_warp_
%(name)
s",
"k_multi_warp_
%(name)
s",
...
@@ -560,4 +560,4 @@ def use_gpu_images2neibs(node):
...
@@ -560,4 +560,4 @@ def use_gpu_images2neibs(node):
if
cuda_available
:
if
cuda_available
:
register_gpu_opt
()(
use_gpu_images2neibs
)
register_gpu_opt
()(
use_gpu_images2neibs
)
theano/sparse/sandbox/truedot.py
浏览文件 @
032c5b14
...
@@ -60,7 +60,7 @@ class TrueDot(gof.op.Op):
...
@@ -60,7 +60,7 @@ class TrueDot(gof.op.Op):
if
self
.
grad_preserves_dense
:
if
self
.
grad_preserves_dense
:
rval
[
1
]
=
dense_from_sparse
(
rval
[
1
])
rval
[
1
]
=
dense_from_sparse
(
rval
[
1
])
return
rval
return
rval
def
true_dot
(
x
,
y
,
grad_preserves_dense
=
True
):
def
true_dot
(
x
,
y
,
grad_preserves_dense
=
True
):
"""
"""
@todo: Maybe the triple-transposition formulation (when x is dense)
@todo: Maybe the triple-transposition formulation (when x is dense)
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论