Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
be0902fb
提交
be0902fb
authored
4月 01, 2010
作者:
Razvan Pascanu
浏览文件
操作
浏览文件
下载
差异文件
merge
上级
0fe8b8f8
223cdae9
隐藏空白字符变更
内嵌
并排
正在显示
13 个修改的文件
包含
223 行增加
和
38 行删除
+223
-38
README.txt
README.txt
+3
-5
install.txt
doc/install.txt
+7
-0
debugmode.py
theano/compile/debugmode.py
+2
-2
cutils.py
theano/gof/cutils.py
+1
-1
test_nnet.py
theano/sandbox/cuda/tests/test_nnet.py
+1
-1
rng_mrg.py
theano/sandbox/rng_mrg.py
+119
-0
basic.py
theano/scalar/basic.py
+8
-1
blas.py
theano/tensor/blas.py
+31
-7
conv.py
theano/tensor/nnet/conv.py
+9
-5
speed_test_conv.py
theano/tensor/nnet/tests/speed_test_conv.py
+12
-8
test_conv.py
theano/tensor/nnet/tests/test_conv.py
+14
-4
opt.py
theano/tensor/opt.py
+10
-2
test_blas.py
theano/tensor/tests/test_blas.py
+6
-2
没有找到文件。
README.txt
浏览文件 @
be0902fb
To install the package,
us
e:
To install the package,
see this pag
e:
python setup.py build
http://deeplearning.net/software/theano/install.html#install
python setup.py test
python setup.py install
For the documentation, see the project website:
For the documentation, see the project website:
http://pylearn.org
/theano/
http://deeplearning.net/software
/theano/
We recommend you look at the documentation on the website, since it
We recommend you look at the documentation on the website, since it
will be more current than the documentation included with the package.
will be more current than the documentation included with the package.
...
...
doc/install.txt
浏览文件 @
be0902fb
...
@@ -333,6 +333,13 @@ but this has not been tested yet.
...
@@ -333,6 +333,13 @@ but this has not been tested yet.
cp libblas.dll /mingw/lib
cp libblas.dll /mingw/lib
mv libblas.dll /mingw/bin
mv libblas.dll /mingw/bin
- Edit (or create) your ``$HOME/.theanorc`` and add the following section:
.. code-block:: bash
[blas]
ldflags = -lblas
- Install `Mercurial <http://mercurial.selenic.com/downloads/>`__
- Install `Mercurial <http://mercurial.selenic.com/downloads/>`__
(you can use the regular Windows release, you do not need TortoiseHg).
(you can use the regular Windows release, you do not need TortoiseHg).
...
...
theano/compile/debugmode.py
浏览文件 @
be0902fb
...
@@ -936,7 +936,7 @@ class _Linker(gof.link.LocalLinker):
...
@@ -936,7 +936,7 @@ class _Linker(gof.link.LocalLinker):
except
(
NotImplementedError
,
utils
.
MethodNotDefined
):
except
(
NotImplementedError
,
utils
.
MethodNotDefined
):
thunks_c
.
append
(
None
)
thunks_c
.
append
(
None
)
if
self
.
maker
.
mode
.
check_py_code
:
if
self
.
maker
.
mode
.
check_py_code
or
thunks_c
[
-
1
]
is
None
:
p
=
node
.
op
.
perform
p
=
node
.
op
.
perform
thunk
=
(
lambda
p
=
p
,
i
=
node_input_storage
,
o
=
node_output_storage
,
n
=
thunk
=
(
lambda
p
=
p
,
i
=
node_input_storage
,
o
=
node_output_storage
,
n
=
node
:
p
(
n
,
[
x
[
0
]
for
x
in
i
],
o
))
node
:
p
(
n
,
[
x
[
0
]
for
x
in
i
],
o
))
...
@@ -1455,7 +1455,7 @@ class DebugMode(Mode):
...
@@ -1455,7 +1455,7 @@ class DebugMode(Mode):
check_py_code
=
config
.
DebugMode
.
check_py
check_py_code
=
config
.
DebugMode
.
check_py
"""
"""
Should we evaluate (and check) the `perform` implementations?
Should we evaluate (and check) the `perform` implementations?
Always checked if no `c_code`.
"""
"""
check_isfinite
=
config
.
DebugMode
.
check_finite
check_isfinite
=
config
.
DebugMode
.
check_finite
...
...
theano/gof/cutils.py
浏览文件 @
be0902fb
...
@@ -33,7 +33,7 @@ run_cthunk(PyObject *self, PyObject *args)
...
@@ -33,7 +33,7 @@ run_cthunk(PyObject *self, PyObject *args)
return NULL;
return NULL;
}
}
void * ptr_addr = PyCObject_AsVoidPtr(py_cthunk);
void * ptr_addr = PyCObject_AsVoidPtr(py_cthunk);
int (*fn)(void*) =
reinterpret_cast<int (*)(void*)>
(ptr_addr);
int (*fn)(void*) =
(int (*)(void*))
(ptr_addr);
void* it = PyCObject_GetDesc(py_cthunk);
void* it = PyCObject_GetDesc(py_cthunk);
int failure = fn(it);
int failure = fn(it);
...
...
theano/sandbox/cuda/tests/test_nnet.py
浏览文件 @
be0902fb
...
@@ -12,10 +12,10 @@ import theano.tensor.signal.downsample as downsample
...
@@ -12,10 +12,10 @@ import theano.tensor.signal.downsample as downsample
import
numpy
import
numpy
raise
SkipTest
(
'SKIP TO MAKE THE BUILDBOT DON
\'
T CRASH. THEIR IS A DIFFICULT BUG TO FIX WITH MEMORY LEAK AND/OR WHEN Cuda_Ndarray alloc fail!'
)
# Skip test if cuda_ndarray is not available.
# Skip test if cuda_ndarray is not available.
from
nose.plugins.skip
import
SkipTest
from
nose.plugins.skip
import
SkipTest
raise
SkipTest
(
'SKIP TO PREVENT THE BUILDBOT FROM CRASHING. THERE IS A DIFFICULT BUG TO FIX WITH MEMORY LEAK AND/OR WHEN Cuda_Ndarray alloc fail!'
)
import
theano.sandbox.cuda
as
cuda_ndarray
import
theano.sandbox.cuda
as
cuda_ndarray
if
cuda_ndarray
.
cuda_available
==
False
:
if
cuda_ndarray
.
cuda_available
==
False
:
raise
SkipTest
(
'Optional package cuda disabled'
)
raise
SkipTest
(
'Optional package cuda disabled'
)
...
...
theano/sandbox/rng_mrg.py
浏览文件 @
be0902fb
...
@@ -10,6 +10,7 @@ import numpy
...
@@ -10,6 +10,7 @@ import numpy
from
theano
import
Op
,
Apply
,
shared
,
config
from
theano
import
Op
,
Apply
,
shared
,
config
from
theano.tensor
import
raw_random
,
TensorType
,
as_tensor_variable
,
get_vector_length
,
cast
,
opt
from
theano.tensor
import
raw_random
,
TensorType
,
as_tensor_variable
,
get_vector_length
,
cast
,
opt
from
theano.tensor
import
zeros_like
,
sqrt
,
log
,
sin
,
cos
,
join
from
theano.compile
import
optdb
from
theano.compile
import
optdb
from
theano.gof
import
local_optimizer
from
theano.gof
import
local_optimizer
...
@@ -650,6 +651,49 @@ class MRG_RandomStreams(object):
...
@@ -650,6 +651,49 @@ class MRG_RandomStreams(object):
else
:
else
:
raise
NotImplementedError
(
"MRG_RandomStreams.binomial with n > 1"
)
raise
NotImplementedError
(
"MRG_RandomStreams.binomial with n > 1"
)
def
normal
(
self
,
size
=
None
,
avg
=
0.0
,
std
=
1.0
,
ndim
=
None
,
dtype
=
config
.
floatX
):
# We need an even number of ]0,1[ samples. Then we split them
# in two halves. First half becomes our U1's for Box-Muller,
# second half our U2's. See Wikipedia page:
# http://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform
n_samples
=
self
.
n_streams
(
size
)
evened
=
False
if
n_samples
%
2
==
1
:
n_samples
+=
1
evened
=
True
flattened
=
self
.
uniform
(
size
=
(
n_samples
,),
dtype
=
dtype
)
U1
=
flattened
[:
n_samples
/
2
]
U2
=
flattened
[
n_samples
/
2
:]
#normal_samples = zeros_like(flattened)
sqrt_ln_U1
=
sqrt
(
-
2.0
*
log
(
U1
))
# TypeError: 'TensorVariable' object does not support item assignment
# so this doesn't work...
#normal_samples[:n_samples/2] = sqrt_ln_U1 * cos(2.0*numpy.pi*U2)
#normal_samples[n_samples/2:] = sqrt_ln_U1 * sin(2.0*numpy.pi*U2)
# so trying this instead
first_half
=
sqrt_ln_U1
*
cos
(
2.0
*
numpy
.
pi
*
U2
)
second_half
=
sqrt_ln_U1
*
sin
(
2.0
*
numpy
.
pi
*
U2
)
normal_samples
=
join
(
0
,
first_half
,
second_half
)
final_samples
=
None
if
evened
:
final_samples
=
normal_samples
[:
-
1
]
else
:
final_samples
=
normal_samples
final_samples
=
avg
+
std
*
final_samples
if
size
:
final_samples
=
final_samples
.
reshape
(
size
)
return
final_samples
@local_optimizer
([
None
])
@local_optimizer
([
None
])
def
mrg_random_make_inplace
(
node
):
def
mrg_random_make_inplace
(
node
):
op
=
node
.
op
op
=
node
.
op
...
@@ -734,3 +778,78 @@ def test_rng0():
...
@@ -734,3 +778,78 @@ def test_rng0():
basictest
(
ff
,
1000
,
prefix
=
'numpy'
)
basictest
(
ff
,
1000
,
prefix
=
'numpy'
)
def
test_normal0
():
def
basictest
(
f
,
steps
,
target_avg
,
target_std
,
prefix
=
""
):
dt
=
0.0
avg_std
=
0.0
for
i
in
xrange
(
steps
):
t0
=
time
.
time
()
ival
=
f
()
dt
+=
time
.
time
()
-
t0
ival
=
numpy
.
asarray
(
ival
)
if
i
==
0
:
mean
=
numpy
.
array
(
ival
,
copy
=
True
)
avg_std
=
numpy
.
std
(
ival
)
else
:
alpha
=
1.0
/
(
1
+
i
)
mean
=
alpha
*
ival
+
(
1
-
alpha
)
*
mean
avg_std
=
alpha
*
numpy
.
std
(
ival
)
+
(
1
-
alpha
)
*
avg_std
print
prefix
,
'mean'
,
numpy
.
mean
(
mean
)
assert
abs
(
numpy
.
mean
(
mean
)
-
target_avg
)
<
.
01
,
'bad mean?'
print
prefix
,
'std'
,
avg_std
assert
abs
(
avg_std
-
target_std
)
<
.
01
,
'bad std?'
print
prefix
,
'time'
,
dt
print
prefix
,
'elements'
,
steps
*
sample_size
[
0
]
*
sample_size
[
1
]
print
prefix
,
'samples/sec'
,
steps
*
sample_size
[
0
]
*
sample_size
[
1
]
/
dt
sample_size
=
(
999
,
100
)
print
''
print
'ON CPU:'
R
=
MRG_RandomStreams
(
234
,
use_cuda
=
False
)
n
=
R
.
normal
(
size
=
sample_size
,
avg
=-
5.0
,
std
=
2.0
)
f
=
theano
.
function
([],
n
)
theano
.
printing
.
debugprint
(
f
)
print
'random?[:10]
\n
'
,
f
()[
0
,
0
:
10
]
basictest
(
f
,
50
,
-
5.0
,
2.0
,
prefix
=
'mrg '
)
sys
.
stdout
.
flush
()
# now with odd number of samples
sample_size
=
(
999
,
99
)
print
''
print
'ON GPU:'
R
=
MRG_RandomStreams
(
234
,
use_cuda
=
True
)
n
=
R
.
normal
(
size
=
sample_size
,
avg
=-
5.0
,
std
=
2.0
,
dtype
=
'float32'
)
assert
n
.
dtype
==
'float32'
#well, it's really that this test w GPU doesn't make sense otw
f
=
theano
.
function
([],
theano
.
Out
(
theano
.
sandbox
.
cuda
.
basic_ops
.
gpu_from_host
(
n
),
borrow
=
True
))
theano
.
printing
.
debugprint
(
f
)
print
'random?[:10]
\n
'
,
numpy
.
asarray
(
f
())[
0
,
0
:
10
]
basictest
(
f
,
50
,
-
5.0
,
2.0
,
prefix
=
'gpu mrg '
)
sys
.
stdout
.
flush
()
print
''
print
'ON CPU w NUMPY:'
RR
=
theano
.
tensor
.
shared_randomstreams
.
RandomStreams
(
234
)
nn
=
RR
.
normal
(
size
=
sample_size
,
avg
=-
5.0
,
std
=
2.0
)
ff
=
theano
.
function
([],
nn
)
basictest
(
ff
,
50
,
-
5.0
,
2.0
,
prefix
=
'numpy '
)
#if __name__ == '__main__':
# # with: export THEANO_FLAGS=device=gpu0,floatX=float32
# test_normal0()
theano/scalar/basic.py
浏览文件 @
be0902fb
...
@@ -1414,12 +1414,16 @@ class Composite(ScalarOp):
...
@@ -1414,12 +1414,16 @@ class Composite(ScalarOp):
name
=
"V
%%(id)
s_tmp
%
i"
%
i
name
=
"V
%%(id)
s_tmp
%
i"
%
i
subd
[
output
]
=
name
subd
[
output
]
=
name
_c_code
+=
"
%
s
%
s;
\n
"
%
(
output
.
type
.
dtype_specs
()[
1
],
name
)
_c_code
+=
"
%
s
%
s;
\n
"
%
(
output
.
type
.
dtype_specs
()[
1
],
name
)
_c_code
+=
node
.
op
.
c_code
(
node
,
s
=
node
.
op
.
c_code
(
node
,
"
%(name)
s"
,
"
%(name)
s"
,
[
subd
[
input
]
for
input
in
node
.
inputs
],
[
subd
[
input
]
for
input
in
node
.
inputs
],
[
subd
[
output
]
for
output
in
node
.
outputs
],
[
subd
[
output
]
for
output
in
node
.
outputs
],
dict
(
fail
=
"
%(fail)
s"
,
dict
(
fail
=
"
%(fail)
s"
,
id
=
"
%%(id)
s_
%
i"
%
j
))
id
=
"
%%(id)
s_
%
i"
%
j
))
if
any
([
isinstance
(
x
.
op
,
Mod
)
for
x
in
env
.
toposort
()]):
s
=
s
.
replace
(
'
%
'
,
'
%%
'
)
_c_code
+=
s
_c_code
+=
"
\n
"
_c_code
+=
"
\n
"
_c_code
+=
"}
\n
"
_c_code
+=
"}
\n
"
...
@@ -1481,6 +1485,9 @@ class Composite(ScalarOp):
...
@@ -1481,6 +1485,9 @@ class Composite(ScalarOp):
return
self
.
_c_code
%
d
return
self
.
_c_code
%
d
def
c_code_cache_version
(
self
):
return
(
1
,)
+
tuple
([
x
.
op
.
c_code_cache_version
()
for
x
in
self
.
env
.
toposort
()])
def
__eq__
(
self
,
other
):
def
__eq__
(
self
,
other
):
if
self
is
other
:
return
True
if
self
is
other
:
return
True
if
not
isinstance
(
other
,
self
.
__class__
):
return
False
if
not
isinstance
(
other
,
self
.
__class__
):
return
False
...
...
theano/tensor/blas.py
浏览文件 @
be0902fb
...
@@ -100,18 +100,24 @@ class GemmRelated(Op):
...
@@ -100,18 +100,24 @@ class GemmRelated(Op):
#ifndef MOD
#ifndef MOD
#define MOD
%
#define MOD
%
#endif
#endif
static double time_time() // a time function like time.time()
{
struct timeval tv;
gettimeofday(&tv, 0);
return (double) tv.tv_sec + (double) tv.tv_usec / 1000000.0;
}
"""
"""
return
blas_header_text
()
+
mod_str
return
blas_header_text
()
+
mod_str
def
c_headers
(
self
):
def
c_headers
(
self
):
# std.cout doesn't require the '%' symbol to print stuff...
# std.cout doesn't require the '%' symbol to print stuff...
# so it works much better with python's string-substitution stuff.
# so it works much better with python's string-substitution stuff.
return
[
'<iostream>'
]
return
[
'<iostream>'
,
'<time.h>'
,
'<sys/time.h>'
]
def
c_libraries
(
self
):
def
c_libraries
(
self
):
return
ldflags
()
return
ldflags
()
def
c_code_cache_version
(
self
):
# code_cache_version is built by subclasses from
return
(
0
,
0
,
1
)
# build_gemm_version
def
c_compile_args
(
self
):
def
c_compile_args
(
self
):
return
ldflags
(
libs
=
False
,
flags
=
True
)
return
ldflags
(
libs
=
False
,
flags
=
True
)
...
@@ -247,6 +253,7 @@ class GemmRelated(Op):
...
@@ -247,6 +253,7 @@ class GemmRelated(Op):
char T = 'T';
char T = 'T';
int Nz0 = Nz[0], Nz1 = Nz[1], Nx1 = Nx[1];
int Nz0 = Nz[0], Nz1 = Nz[1], Nx1 = Nx[1];
//std::cerr << (unit/256) MOD 16 << (unit / 16) MOD 16 << unit MOD 16<< '
\\
n';
//std::cerr << (unit/256) MOD 16 << (unit / 16) MOD 16 << unit MOD 16<< '
\\
n';
//double t0 = time_time();
switch(unit)
switch(unit)
{
{
case 0x000: sgemm_(&N, &N, &Nz1, &Nz0, &Nx1, &a, y, &sy_0, x, &sx_0, &b, z, &sz_0); break;
case 0x000: sgemm_(&N, &N, &Nz1, &Nz0, &Nx1, &a, y, &sy_0, x, &sx_0, &b, z, &sz_0); break;
...
@@ -259,6 +266,7 @@ class GemmRelated(Op):
...
@@ -259,6 +266,7 @@ class GemmRelated(Op):
case 0x111: sgemm_(&N, &N, &Nz0, &Nz1, &Nx1, &a, x, &sx_1, y, &sy_1, &b, z, &sz_1); break;
case 0x111: sgemm_(&N, &N, &Nz0, &Nz1, &Nx1, &a, x, &sx_1, y, &sy_1, &b, z, &sz_1); break;
default: PyErr_SetString(PyExc_ValueError, "some matrix has no unit stride");
%(fail)
s;
default: PyErr_SetString(PyExc_ValueError, "some matrix has no unit stride");
%(fail)
s;
};
};
//fprintf(stderr, "Calling sgemm
%%
i
%%
i
%%
i
%%
i took
%%
f
\\
n", unit, Nz1, Nz0, Nx1, time_time() - t0);
"""
"""
case_double
=
"""
case_double
=
"""
...
@@ -278,6 +286,7 @@ class GemmRelated(Op):
...
@@ -278,6 +286,7 @@ class GemmRelated(Op):
char T = 'T';
char T = 'T';
int Nz0 = Nz[0], Nz1 = Nz[1], Nx1 = Nx[1];
int Nz0 = Nz[0], Nz1 = Nz[1], Nx1 = Nx[1];
//std::cerr << (unit/256) MOD 16 << (unit / 16) MOD 16 << unit MOD 16<< '
\\
n';
//std::cerr << (unit/256) MOD 16 << (unit / 16) MOD 16 << unit MOD 16<< '
\\
n';
//double t0 = time_time();
switch(unit)
switch(unit)
{
{
case 0x000: dgemm_(&N, &N, &Nz1, &Nz0, &Nx1, &a, y, &sy_0, x, &sx_0, &b, z, &sz_0); break;
case 0x000: dgemm_(&N, &N, &Nz1, &Nz0, &Nx1, &a, y, &sy_0, x, &sx_0, &b, z, &sz_0); break;
...
@@ -290,6 +299,7 @@ class GemmRelated(Op):
...
@@ -290,6 +299,7 @@ class GemmRelated(Op):
case 0x111: dgemm_(&N, &N, &Nz0, &Nz1, &Nx1, &a, x, &sx_1, y, &sy_1, &b, z, &sz_1); break;
case 0x111: dgemm_(&N, &N, &Nz0, &Nz1, &Nx1, &a, x, &sx_1, y, &sy_1, &b, z, &sz_1); break;
default: PyErr_SetString(PyExc_ValueError, "some matrix has no unit stride");
%(fail)
s;
default: PyErr_SetString(PyExc_ValueError, "some matrix has no unit stride");
%(fail)
s;
};
};
//fprintf(stderr, "Calling dgemm
%%
i
%%
i
%%
i
%%
i took
%%
f
\\
n", unit, Nz1, Nz0, Nx1, time_time()- t0);
"""
"""
end_switch_typenum
=
"""
end_switch_typenum
=
"""
...
@@ -319,7 +329,7 @@ class GemmRelated(Op):
...
@@ -319,7 +329,7 @@ class GemmRelated(Op):
self
.
end_switch_typenum
),
''
)
self
.
end_switch_typenum
),
''
)
def
build_gemm_version
(
self
):
def
build_gemm_version
(
self
):
return
(
2
,)
return
(
4
,)
class
Gemm
(
GemmRelated
):
class
Gemm
(
GemmRelated
):
"""In-place version of matrix-matrix multiplication (with accumulation):
"""In-place version of matrix-matrix multiplication (with accumulation):
...
@@ -442,6 +452,7 @@ class Gemm(GemmRelated):
...
@@ -442,6 +452,7 @@ class Gemm(GemmRelated):
dims[0] =
%(_z)
s->dimensions[0];
dims[0] =
%(_z)
s->dimensions[0];
dims[1] =
%(_z)
s->dimensions[1];
dims[1] =
%(_z)
s->dimensions[1];
%(_zout)
s = (PyArrayObject*)PyArray_SimpleNew(2, dims, type_num_
%(_z)
s);
%(_zout)
s = (PyArrayObject*)PyArray_SimpleNew(2, dims, type_num_
%(_z)
s);
//fprintf(stderr, "Gemm Allocating
%%
i
%%
i
\\
n", dims[0], dims[1]);
if(!
%(_zout)
s) {
if(!
%(_zout)
s) {
PyErr_SetString(PyExc_MemoryError, "failed to alloc gemm_no_inplace output");
PyErr_SetString(PyExc_MemoryError, "failed to alloc gemm_no_inplace output");
%(fail)
s
%(fail)
s
...
@@ -515,7 +526,11 @@ class Gemm(GemmRelated):
...
@@ -515,7 +526,11 @@ class Gemm(GemmRelated):
return
full_code
return
full_code
def
c_code_cache_version
(
self
):
def
c_code_cache_version
(
self
):
return
(
3
,)
+
self
.
build_gemm_version
()
gv
=
self
.
build_gemm_version
()
if
gv
:
return
(
3
,)
+
gv
else
:
return
gv
gemm_inplace
=
Gemm
(
inplace
=
True
)
gemm_inplace
=
Gemm
(
inplace
=
True
)
gemm_no_inplace
=
Gemm
(
inplace
=
False
)
gemm_no_inplace
=
Gemm
(
inplace
=
False
)
...
@@ -817,6 +832,7 @@ class Dot22(GemmRelated):
...
@@ -817,6 +832,7 @@ class Dot22(GemmRelated):
dims[0] =
%(_x)
s->dimensions[0];
dims[0] =
%(_x)
s->dimensions[0];
dims[1] =
%(_y)
s->dimensions[1];
dims[1] =
%(_y)
s->dimensions[1];
%(_zout)
s = (PyArrayObject*)PyArray_SimpleNew(2, dims, type_num_
%(_x)
s);
%(_zout)
s = (PyArrayObject*)PyArray_SimpleNew(2, dims, type_num_
%(_x)
s);
//fprintf(stderr, "Dot Allocating
%%
i
%%
i
\\
n", dims[0], dims[1]);
if(!
%(_zout)
s) {
if(!
%(_zout)
s) {
PyErr_SetString(PyExc_MemoryError, "failed to alloc dot22 output");
PyErr_SetString(PyExc_MemoryError, "failed to alloc dot22 output");
%(fail)
s
%(fail)
s
...
@@ -841,7 +857,11 @@ class Dot22(GemmRelated):
...
@@ -841,7 +857,11 @@ class Dot22(GemmRelated):
full_code
=
self
.
build_gemm_call
()
%
dict
(
locals
(),
**
sub
)
full_code
=
self
.
build_gemm_call
()
%
dict
(
locals
(),
**
sub
)
return
full_code
return
full_code
def
c_code_cache_version
(
self
):
def
c_code_cache_version
(
self
):
return
(
1
,)
+
self
.
build_gemm_version
()
gv
=
self
.
build_gemm_version
()
if
gv
:
return
(
1
,)
+
gv
else
:
return
gv
_dot22
=
Dot22
()
_dot22
=
Dot22
()
...
@@ -947,7 +967,11 @@ class Dot22Scalar(GemmRelated):
...
@@ -947,7 +967,11 @@ class Dot22Scalar(GemmRelated):
full_code
=
self
.
build_gemm_call
()
%
dict
(
locals
(),
**
sub
)
full_code
=
self
.
build_gemm_call
()
%
dict
(
locals
(),
**
sub
)
return
full_code
return
full_code
def
c_code_cache_version
(
self
):
def
c_code_cache_version
(
self
):
return
(
2
,)
+
self
.
build_gemm_version
()
gv
=
self
.
build_gemm_version
()
if
gv
:
return
(
2
,)
+
gv
else
:
return
gv
_dot22scalar
=
Dot22Scalar
()
_dot22scalar
=
Dot22Scalar
()
...
...
theano/tensor/nnet/conv.py
浏览文件 @
be0902fb
"""
"""
Contains an
o
p for convolving input images with a set of filters. This was
Contains an
O
p for convolving input images with a set of filters. This was
developed especially for Convolutional Neural Networks.
developed especially for Convolutional Neural Networks.
For related ops, including downsampling and subsampling, see
tensor.signal and tensor.signal.downsample.
See especially conv2d().
"""
"""
__docformat__
=
"restructuredtext en"
__docformat__
=
"restructuredtext en"
...
@@ -764,7 +769,6 @@ using namespace std;
...
@@ -764,7 +769,6 @@ using namespace std;
d
[
"self_dx"
]
=
self
.
dx
d
[
"self_dx"
]
=
self
.
dx
d
[
"self_dy"
]
=
self
.
dy
d
[
"self_dy"
]
=
self
.
dy
d
[
"mode"
]
=
self
.
out_mode
.
upper
()
d
[
"mode"
]
=
self
.
out_mode
.
upper
()
d
[
"mode"
]
=
self
.
out_mode
.
upper
()
d
[
"affectation"
]
=
"="
d
[
"affectation"
]
=
"="
if
all_shape
:
if
all_shape
:
d
[
"self_bsize"
]
=
self
.
bsize
d
[
"self_bsize"
]
=
self
.
bsize
...
@@ -910,7 +914,7 @@ if(%(filtersflipped)s->nd==3){
...
@@ -910,7 +914,7 @@ if(%(filtersflipped)s->nd==3){
kerns_dim[1]=
%(filtersflipped)
s->dimensions[1];
kerns_dim[1]=
%(filtersflipped)
s->dimensions[1];
kerns_dim[0]=
%(filtersflipped)
s->dimensions[0];
kerns_dim[0]=
%(filtersflipped)
s->dimensions[0];
}else{
}else{
std:stringstream temp;
std:
:
stringstream temp;
temp << "nddim="<<
%(filtersflipped)
s->nd;
temp << "nddim="<<
%(filtersflipped)
s->nd;
std::string param = temp.str();
std::string param = temp.str();
PyErr_SetString(PyExc_ValueError,
PyErr_SetString(PyExc_ValueError,
...
@@ -1145,7 +1149,7 @@ if(%(filtersflipped)s->nd==3){
...
@@ -1145,7 +1149,7 @@ if(%(filtersflipped)s->nd==3){
kerns_dim[1]=
%(filtersflipped)
s->dimensions[1];
kerns_dim[1]=
%(filtersflipped)
s->dimensions[1];
kerns_dim[0]=
%(filtersflipped)
s->dimensions[0];
kerns_dim[0]=
%(filtersflipped)
s->dimensions[0];
}else{
}else{
std:stringstream temp;
std:
:
stringstream temp;
temp << "nddim="<<
%(filtersflipped)
s->nd;
temp << "nddim="<<
%(filtersflipped)
s->nd;
std::string param = temp.str();
std::string param = temp.str();
PyErr_SetString(PyExc_ValueError,
PyErr_SetString(PyExc_ValueError,
...
@@ -1377,7 +1381,7 @@ if(%(img2d)s->nd==2){
...
@@ -1377,7 +1381,7 @@ if(%(img2d)s->nd==2){
img2d_dim[1]=
%(img2d)
s->dimensions[1];
img2d_dim[1]=
%(img2d)
s->dimensions[1];
img2d_dim[0]=
%(img2d)
s->dimensions[0];
img2d_dim[0]=
%(img2d)
s->dimensions[0];
}else {
}else {
std:stringstream temp;
std:
:
stringstream temp;
temp << "nddim="<<
%(img2d)
s->nd;
temp << "nddim="<<
%(img2d)
s->nd;
std::string param = temp.str();
std::string param = temp.str();
PyErr_SetString(PyExc_ValueError,
PyErr_SetString(PyExc_ValueError,
...
...
theano/tensor/nnet/tests/speed_test_conv.py
浏览文件 @
be0902fb
...
@@ -143,16 +143,18 @@ def speed_multilayer_conv():
...
@@ -143,16 +143,18 @@ def speed_multilayer_conv():
validate
=
False
# we don't validate the result to have it much faster!
validate
=
False
# we don't validate the result to have it much faster!
verbose
=
1
verbose
=
1
unroll_batch
=
[
1
,
2
,
4
,
5
,
10
,
20
]
unroll_batch
=
[
1
,
2
,
3
,
4
,
5
,
10
]
#15, 30, 60 always much slower
unroll_kern
=
[
1
,
2
,
4
,
5
,
10
,
20
]
unroll_kern
=
[
1
,
2
,
3
,
4
,
5
,
10
]
#15, 30, 60 always much slower
unroll_batch
=
[
1
,
4
,
5
]
#unroll_batch = [1,4,5]
unroll_kern
=
[
1
,
4
,
5
]
#unroll_kern = [1,4,5]
#unroll_batch = [1,4]
#unroll_kern = [1,4]
unroll_patch
=
[
True
,
False
]
unroll_patch
=
[
True
,
False
]
bsize
=
2
0
# batch size
bsize
=
6
0
# batch size
imshp_start
=
(
1
,
48
,
48
)
#un square shape to test more corner case.
imshp_start
=
(
1
,
48
,
48
)
#un square shape to test more corner case.
kshps
=
([
11
,
12
],[
12
,
11
])
#un square shape to test more corner case.
kshps
=
([
11
,
12
],[
12
,
11
])
#un square shape to test more corner case.
nkerns
=
[
20
,
2
0
]
# per output pixel
nkerns
=
[
60
,
6
0
]
# per output pixel
ssizes
=
[(
1
,
1
),]
#(1,1)]#(2,2) bugged
ssizes
=
[(
1
,
1
),]
#(1,1)]#(2,2) bugged
convmodes
=
[
'valid'
,
'full'
]
convmodes
=
[
'valid'
,
'full'
]
do_convolve2
=
False
do_convolve2
=
False
...
@@ -212,8 +214,10 @@ def speed_multilayer_conv():
...
@@ -212,8 +214,10 @@ def speed_multilayer_conv():
best
=
N
.
asarray
(
best
)
best
=
N
.
asarray
(
best
)
worst
=
N
.
asarray
(
worst
)
worst
=
N
.
asarray
(
worst
)
print
"timing for unrolled version"
print
"timing for unrolled version"
print
t_b_k
print
"unroll_batch/unroll_kern valid_mode full_mode"
print
t
for
n_b
in
range
(
len
(
unroll_batch
)):
for
n_k
in
range
(
len
(
unroll_kern
)):
print
unroll_batch
[
n_b
],
"/"
,
unroll_kern
[
n_k
],
" "
,
t
[
n_b
,
n_k
]
t_detail
=
t
t_detail
=
t
t
=
t
.
sum
(
axis
=
2
)
t
=
t
.
sum
(
axis
=
2
)
print
"max
%.3
fs"
%
t
.
max
(),
"max param(batch unloop size/kernel unloop size)"
,
t_b_k
[
t
.
argmax
()]
print
"max
%.3
fs"
%
t
.
max
(),
"max param(batch unloop size/kernel unloop size)"
,
t_b_k
[
t
.
argmax
()]
...
...
theano/tensor/nnet/tests/test_conv.py
浏览文件 @
be0902fb
...
@@ -88,10 +88,10 @@ class TestConv2D(unittest.TestCase):
...
@@ -88,10 +88,10 @@ class TestConv2D(unittest.TestCase):
Tests that basic convolutions work for odd and even dimensions of image and filter
Tests that basic convolutions work for odd and even dimensions of image and filter
shapes, as well as rectangular images and filters.
shapes, as well as rectangular images and filters.
"""
"""
self
.
validate
((
3
,
2
,
8
,
8
),
(
4
,
2
,
5
,
5
),
'valid'
)
self
.
validate
((
3
,
2
,
8
,
8
),
(
4
,
2
,
5
,
5
),
'valid'
,
verify_grad
=
False
)
self
.
validate
((
3
,
2
,
7
,
5
),
(
5
,
2
,
2
,
3
),
'valid'
)
self
.
validate
((
3
,
2
,
7
,
5
),
(
5
,
2
,
2
,
3
),
'valid'
)
self
.
validate
((
3
,
2
,
7
,
5
),
(
5
,
2
,
3
,
2
),
'valid'
)
self
.
validate
((
3
,
2
,
7
,
5
),
(
5
,
2
,
3
,
2
),
'valid'
,
verify_grad
=
False
)
self
.
validate
((
3
,
2
,
8
,
8
),
(
4
,
2
,
5
,
5
),
'full'
)
self
.
validate
((
3
,
2
,
8
,
8
),
(
4
,
2
,
5
,
5
),
'full'
,
verify_grad
=
False
)
self
.
validate
((
3
,
2
,
7
,
5
),
(
5
,
2
,
2
,
3
),
'full'
)
self
.
validate
((
3
,
2
,
7
,
5
),
(
5
,
2
,
2
,
3
),
'full'
)
# test filter same size as input
# test filter same size as input
...
@@ -105,7 +105,7 @@ class TestConv2D(unittest.TestCase):
...
@@ -105,7 +105,7 @@ class TestConv2D(unittest.TestCase):
"""
"""
self
.
validate
((
3
,
2
,
7
,
5
),
(
5
,
2
,
2
,
3
),
'valid'
,
unroll_patch
=
False
)
self
.
validate
((
3
,
2
,
7
,
5
),
(
5
,
2
,
2
,
3
),
'valid'
,
unroll_patch
=
False
)
self
.
validate
((
3
,
2
,
7
,
5
),
(
5
,
2
,
2
,
3
),
'full'
,
unroll_patch
=
False
)
self
.
validate
((
3
,
2
,
7
,
5
),
(
5
,
2
,
2
,
3
),
'full'
,
unroll_patch
=
False
)
self
.
validate
((
3
,
2
,
3
,
3
),
(
4
,
2
,
3
,
3
),
'valid'
,
unroll_patch
=
False
)
self
.
validate
((
3
,
2
,
3
,
3
),
(
4
,
2
,
3
,
3
),
'valid'
,
unroll_patch
=
False
,
verify_grad
=
False
)
def
test_unroll_special
(
self
):
def
test_unroll_special
(
self
):
"""
"""
...
@@ -175,7 +175,17 @@ class TestConv2D(unittest.TestCase):
...
@@ -175,7 +175,17 @@ class TestConv2D(unittest.TestCase):
"""
"""
try
:
try
:
self
.
validate
((
3
,
2
,
8
,
8
),
(
4
,
2
,
5
,
5
),
'valid'
,
input
=
T
.
dmatrix
())
self
.
validate
((
3
,
2
,
8
,
8
),
(
4
,
2
,
5
,
5
),
'valid'
,
input
=
T
.
dmatrix
())
# should never reach here
self
.
fail
()
except
:
pass
try
:
self
.
validate
((
3
,
2
,
8
,
8
),
(
4
,
2
,
5
,
5
),
'valid'
,
filters
=
T
.
dvector
())
self
.
validate
((
3
,
2
,
8
,
8
),
(
4
,
2
,
5
,
5
),
'valid'
,
filters
=
T
.
dvector
())
# should never reach here
self
.
fail
()
except
:
pass
try
:
self
.
validate
((
3
,
2
,
8
,
8
),
(
4
,
2
,
5
,
5
),
'valid'
,
input
=
T
.
dtensor3
())
self
.
validate
((
3
,
2
,
8
,
8
),
(
4
,
2
,
5
,
5
),
'valid'
,
input
=
T
.
dtensor3
())
# should never reach here
# should never reach here
self
.
fail
()
self
.
fail
()
...
...
theano/tensor/opt.py
浏览文件 @
be0902fb
...
@@ -224,7 +224,12 @@ class MakeVector(T.Op):
...
@@ -224,7 +224,12 @@ class MakeVector(T.Op):
def
__str__
(
self
):
def
__str__
(
self
):
return
self
.
__class__
.
__name__
return
self
.
__class__
.
__name__
def
perform
(
self
,
node
,
inputs
,
(
out
,)):
def
perform
(
self
,
node
,
inputs
,
(
out
,)):
out
[
0
]
=
theano
.
_asarray
(
inputs
,
dtype
=
node
.
outputs
[
0
]
.
dtype
)
# not calling theano._asarray as optimization
if
out
[
0
]
is
None
:
out
[
0
]
=
theano
.
_asarray
(
inputs
,
dtype
=
node
.
outputs
[
0
]
.
dtype
)
else
:
# assume that out has correct dtype. there is no cheap way to check
out
[
0
][
...
]
=
inputs
make_vector
=
MakeVector
()
make_vector
=
MakeVector
()
...
@@ -262,7 +267,10 @@ class Shape_i(T.Op):
...
@@ -262,7 +267,10 @@ class Shape_i(T.Op):
raise
TypeError
(
'x has too few dimensions for Shape_i'
,
(
x
,
self
.
i
))
raise
TypeError
(
'x has too few dimensions for Shape_i'
,
(
x
,
self
.
i
))
return
T
.
Apply
(
self
,
[
x
],
[
T
.
lscalar
()])
return
T
.
Apply
(
self
,
[
x
],
[
T
.
lscalar
()])
def
perform
(
self
,
node
,
(
x
,
),
(
out
,
)):
def
perform
(
self
,
node
,
(
x
,
),
(
out
,
)):
out
[
0
]
=
theano
.
_asarray
(
x
.
shape
[
self
.
i
],
dtype
=
'int64'
)
if
out
[
0
]
is
None
:
out
[
0
]
=
theano
.
_asarray
(
x
.
shape
[
self
.
i
],
dtype
=
'int64'
)
else
:
out
[
0
][
...
]
=
x
.
shape
[
self
.
i
]
def
grad
(
self
,
(
x
,),
(
gz
,)):
def
grad
(
self
,
(
x
,),
(
gz
,)):
return
[
None
]
return
[
None
]
...
...
theano/tensor/tests/test_blas.py
浏览文件 @
be0902fb
...
@@ -603,8 +603,12 @@ def test_dot22scalar():
...
@@ -603,8 +603,12 @@ def test_dot22scalar():
#currently the canonizer don't always merge all Mul together...
#currently the canonizer don't always merge all Mul together...
#that force the optimizer to make a recursive search witch it don't do now.
#that force the optimizer to make a recursive search witch it don't do now.
#but it do it for 1 level of recursion.
#but it do it for 1 level of recursion.
# assert _dot22scalar in [x.op for x in topo]
# assert _dot22scalar in [x.op for x in topo]
# assert len(topo)==2
# assert len(topo)==2
### Fred,
### What are you talking about?
### -James (March 28 2010)
f
(
av
,
bv
,
cv
)
f
(
av
,
bv
,
cv
)
f
=
theano
.
function
([
a
,
b
,
c
],
c
*
a
*
0.2
*
T
.
dot
(
a
,
b
),
mode
=
m2
)
f
=
theano
.
function
([
a
,
b
,
c
],
c
*
a
*
0.2
*
T
.
dot
(
a
,
b
),
mode
=
m2
)
topo
=
f
.
maker
.
env
.
toposort
()
topo
=
f
.
maker
.
env
.
toposort
()
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论