Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
3dc94e90
提交
3dc94e90
authored
2月 06, 2013
作者:
abalkin
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'master' into take-op-c-code-clean
上级
fa5e7d12
1dcc6180
隐藏空白字符变更
内嵌
并排
正在显示
19 个修改的文件
包含
188 行增加
和
92 行删除
+188
-92
theano-nose
bin/theano-nose
+1
-0
debugmode.py
theano/compile/debugmode.py
+13
-6
test_debugmode.py
theano/compile/tests/test_debugmode.py
+34
-2
configdefaults.py
theano/configdefaults.py
+1
-1
cmodule.py
theano/gof/cmodule.py
+13
-5
basic_ops.py
theano/sandbox/cuda/basic_ops.py
+12
-4
blas.py
theano/sandbox/cuda/blas.py
+24
-17
cuda_ndarray.cu
theano/sandbox/cuda/cuda_ndarray.cu
+15
-8
cuda_ndarray.cuh
theano/sandbox/cuda/cuda_ndarray.cuh
+8
-6
type.py
theano/sandbox/cuda/type.py
+2
-2
multinomial.py
theano/sandbox/multinomial.py
+1
-1
test_rng_mrg.py
theano/sandbox/test_rng_mrg.py
+1
-1
test_scan.py
theano/scan_module/tests/test_scan.py
+2
-2
blas.py
theano/tensor/blas.py
+1
-1
conv.py
theano/tensor/nnet/conv.py
+36
-23
opt.py
theano/tensor/opt.py
+3
-0
test_basic.py
theano/tensor/tests/test_basic.py
+12
-11
test_blas.py
theano/tensor/tests/test_blas.py
+8
-1
run_tests_in_batch.py
theano/tests/run_tests_in_batch.py
+1
-1
没有找到文件。
bin/theano-nose
浏览文件 @
3dc94e90
...
...
@@ -87,6 +87,7 @@ def main():
if
time_prof_args
or
batch_args
:
from
theano.tests
import
run_tests_in_batch
return
run_tests_in_batch
.
main
(
theano_nose
=
os
.
path
.
realpath
(
__file__
),
batch_size
=
batch_size
,
time_profile
=
bool
(
time_prof_args
),
display_batch_output
=
display_batch_output
)
...
...
theano/compile/debugmode.py
浏览文件 @
3dc94e90
...
...
@@ -1609,7 +1609,11 @@ class _Linker(gof.link.LocalLinker):
active_order
=
self
.
schedule
(
fgraph
)
# an ordering of just the active nodes
active_order_set
=
set
(
active_order
)
no_recycling
=
self
.
no_recycling
# Disable no_recycling, in order to be able to use
# check_preallocated_output even on the output of the function.
# no_recycling in individual thunks does not really matter, since
# the function's outputs will always be freshly allocated.
no_recycling
=
[]
input_storage
,
output_storage
,
storage_map
=
link
.
map_storage
(
fgraph
,
order
,
input_storage_
,
output_storage_
)
...
...
@@ -1704,11 +1708,14 @@ class _Linker(gof.link.LocalLinker):
_logger
.
warn
(
"We won't check the perform function of node '
%
s' but we will check its make_thunk function"
%
node
)
thunks_py
[
-
1
]
=
thunk
if
no_recycling
is
True
:
no_recycling
=
storage_map
.
values
()
no_recycling
=
utils
.
difference
(
no_recycling
,
input_storage
)
# Use self.no_recycling (that was passed in accept()) to always
# use new memory storage when it is needed, in particular for the
# function's outputs. no_recycling_map will be used in f() below.
if
self
.
no_recycling
is
True
:
no_recycling_map
=
storage_map
.
values
()
no_recycling_map
=
utils
.
difference
(
no_recycling_map
,
input_storage
)
else
:
no_recycling
=
[
storage_map
[
r
]
for
r
in
no_recycling
no_recycling
_map
=
[
storage_map
[
r
]
for
r
in
self
.
no_recycling
if
r
not
in
fgraph
.
inputs
]
# Precompute some things for storage pre-allocation
...
...
@@ -1729,7 +1736,7 @@ class _Linker(gof.link.LocalLinker):
_logger
.
debug
(
"starting a DebugMode call"
)
_logger
.
debug
(
"self.maker.mode.check_preallocated_output:
%
s"
,
self
.
maker
.
mode
.
check_preallocated_output
)
for
x
in
no_recycling
:
for
x
in
no_recycling
_map
:
x
[
0
]
=
None
# nest all this in try-finally to put storage *back* into
...
...
theano/compile/tests/test_debugmode.py
浏览文件 @
3dc94e90
...
...
@@ -709,7 +709,7 @@ class Test_preallocated_output(unittest.TestCase):
a
=
theano
.
tensor
.
fmatrix
(
'a'
)
b
=
theano
.
tensor
.
fmatrix
(
'b'
)
z
=
BrokenCImplementationAdd
()(
a
,
b
)
#
Needed so that z is not the output of the graph
#
In this test, we do not want z to be an output of the graph.
out
=
theano
.
tensor
.
dot
(
z
,
numpy
.
eye
(
7
))
a_val
=
self
.
rng
.
randn
(
7
,
7
)
.
astype
(
'float32'
)
...
...
@@ -730,7 +730,39 @@ class Test_preallocated_output(unittest.TestCase):
check_preallocated_output
=
[
'f_contiguous'
])
f
=
theano
.
function
([
a
,
b
],
out
,
mode
=
mode
)
if
theano
.
config
.
cxx
:
self
.
assertRaises
(
debugmode
.
BadThunkOutput
,
f
,
a_val
,
b_val
)
else
:
# The python code of this op is good.
f
(
a_val
,
b_val
)
def
test_f_contiguous_out
(
self
):
# Same test as test_f_contiguous, but check that it works
# even if z _is_ the output of the graph
a
=
theano
.
tensor
.
fmatrix
(
'a'
)
b
=
theano
.
tensor
.
fmatrix
(
'b'
)
out
=
BrokenCImplementationAdd
()(
a
,
b
)
a_val
=
self
.
rng
.
randn
(
7
,
7
)
.
astype
(
'float32'
)
b_val
=
self
.
rng
.
randn
(
7
,
7
)
.
astype
(
'float32'
)
# Should work
mode
=
debugmode
.
DebugMode
(
check_preallocated_output
=
[
'c_contiguous'
])
f
=
theano
.
function
([
a
,
b
],
out
,
mode
=
mode
)
out_val
=
f
(
a_val
,
b_val
)
#print 'out_val =', out_val
#print out_val.strides
# Should raise an Exception, since the output buffer is
# used incorrectly.
mode
=
debugmode
.
DebugMode
(
check_preallocated_output
=
[
'f_contiguous'
])
f
=
theano
.
function
([
a
,
b
],
out
,
mode
=
mode
)
if
theano
.
config
.
cxx
:
self
.
assertRaises
(
debugmode
.
BadThunkOutput
,
f
,
a_val
,
b_val
)
else
:
...
...
theano/configdefaults.py
浏览文件 @
3dc94e90
...
...
@@ -298,7 +298,7 @@ AddConfigVar('warn.ignore_bug_before',
"bugs found after that version. "
"Warning for specific bugs can be configured with specific "
"[warn] flags."
),
EnumStr
(
'
None'
,
'all'
,
'0.3'
,
'0.4'
,
'0.4.1'
,
'0.5
'
,
'0.6'
,
EnumStr
(
'
0.5'
,
'None'
,
'all'
,
'0.3'
,
'0.4'
,
'0.4.1
'
,
'0.6'
,
allow_override
=
False
),
in_c_key
=
False
)
...
...
theano/gof/cmodule.py
浏览文件 @
3dc94e90
...
...
@@ -891,10 +891,11 @@ class ModuleCache(object):
hash_key
=
hash
(
key
)
key_data
=
None
# We have never seen this key before.
# Acquire lock before creating things in the compile cache,
# to avoid that other processes remove the compile dir while it
# is still empty.
compilelock
.
get_lock
()
# We acquire the lock later only if we where able to
# generate c code Otherwise, we would take the lock for op
# that have only a perform().
lock_taken
=
False
# This try/finally block ensures that the lock is released once we
# are done writing in the cache file or after raising an exception.
try
:
...
...
@@ -918,6 +919,13 @@ class ModuleCache(object):
# The first compilation step is to yield the source code.
src_code
=
compile_steps
.
next
()
module_hash
=
get_module_hash
(
src_code
,
key
)
# The op have c_code, so take the lock.
compilelock
.
get_lock
()
lock_taken
=
True
assert
os
.
path
.
exists
(
location
),
(
"The directory just created shouldn't be deleted!"
)
if
module_hash
in
self
.
module_hash_to_key_data
:
_logger
.
debug
(
"Duplicated module! Will re-use the "
"previous one"
)
...
...
@@ -1039,7 +1047,7 @@ class ModuleCache(object):
finally
:
# Release lock if needed.
if
not
keep_lock
:
if
not
keep_lock
and
lock_taken
:
compilelock
.
release_lock
()
# Update map from key to module name for all keys associated to
...
...
theano/sandbox/cuda/basic_ops.py
浏览文件 @
3dc94e90
...
...
@@ -2737,7 +2737,7 @@ class GpuAlloc(GpuOp):
%(fail)
s;
}
}
if (
%(memset_0)
s)
if (
%(memset_0)
s
&& CudaNdarray_is_c_contiguous(
%(out)
s)
)
{
if (cudaSuccess != cudaMemset(
%(out)
s->devdata, 0,
CudaNdarray_SIZE(
%(out)
s) * 4))
...
...
@@ -2769,7 +2769,7 @@ class GpuAlloc(GpuOp):
return
[
None
for
i
in
inputs
]
def
c_code_cache_version
(
self
):
return
(
5
,)
return
(
7
,)
def
do_constant_folding
(
self
,
node
):
for
client
in
node
.
outputs
[
0
]
.
clients
:
...
...
@@ -2803,6 +2803,13 @@ class GpuContiguous(GpuOp):
def
__hash__
(
self
):
return
hash
(
type
(
self
))
def
grad
(
self
,
inputs
,
dout
):
x
,
=
inputs
dout
,
=
dout
return
[
dout
]
def
__str__
(
self
):
return
self
.
__class__
.
__name__
...
...
@@ -2824,7 +2831,8 @@ class GpuContiguous(GpuOp):
} else if ((NULL ==
%(z)
s)"""
%
locals
()
for
i
in
xrange
(
len
(
node
.
inputs
[
0
]
.
type
.
broadcastable
)):
str
+=
"
\n
|| (CudaNdarray_HOST_DIMS(
%(input)
s)[
%(i)
s] != CudaNdarray_HOST_DIMS(
%(z)
s)[
%(i)
s])"
%
locals
()
str
+=
""")
str
+=
"""
|| !CudaNdarray_is_c_contiguous(
%(z)
s))
{
Py_XDECREF(
%(z)
s);
%(z)
s = (CudaNdarray*)CudaNdarray_Copy(
%(input)
s);
...
...
@@ -2840,7 +2848,7 @@ class GpuContiguous(GpuOp):
return
str
def
c_code_cache_version
(
self
):
return
(
1
,)
return
(
2
,)
gpu_contiguous
=
GpuContiguous
()
...
...
theano/sandbox/cuda/blas.py
浏览文件 @
3dc94e90
...
...
@@ -748,7 +748,7 @@ class GpuDownsampleFactorMax(GpuOp):
#def perform(self, node, input_storage, output_storage):
#raise NotImplementedError('only C is implemented')
def
c_code_cache_version
(
self
):
return
(
5
)
return
(
6
)
def
c_code
(
self
,
node
,
nodename
,
inp
,
out
,
sub
):
x
,
=
inp
...
...
@@ -849,6 +849,9 @@ class GpuDownsampleFactorMax(GpuOp):
float *z, int zS0, int zS1, int zS2, int zS3)
{
float cur_max, cur_x;
// Cast threadIdx.x into a signed int, to avoid problems with
// indexing with negative offsets.
int tx = threadIdx.x;
for(int block_x_idx = blockIdx.x;
block_x_idx < D0 * D1;
block_x_idx += gridDim.x){
...
...
@@ -865,7 +868,7 @@ class GpuDownsampleFactorMax(GpuOp):
{
__syncthreads();
// load the current row of the image into shared memory
for (int j = t
hreadIdx.
x; j < xD3; j += blockDim.x)
for (int j = tx; j < xD3; j += blockDim.x)
{
xbuf[j] = x[i0*xS0 + i1*xS1 + (i2*pf2+r2)*xS2 + j*xS3];
}
...
...
@@ -873,7 +876,7 @@ class GpuDownsampleFactorMax(GpuOp):
// initialize our max if this is the
// first row we're loading
cur_max = (r2 == 0) ? xbuf[t
hreadIdx.
x*pf3] : cur_max;
cur_max = (r2 == 0) ? xbuf[tx*pf3] : cur_max;
// do a mini-reduction over the pf3 relevant elements
// in the current row
...
...
@@ -882,7 +885,7 @@ class GpuDownsampleFactorMax(GpuOp):
{
for (int k = 0; k < pf3; ++k)
{
cur_x = xbuf[t
hreadIdx.
x*pf3+k];
cur_x = xbuf[tx*pf3+k];
cur_max = (cur_x > cur_max) ? cur_x : cur_max;
}
}
...
...
@@ -890,17 +893,16 @@ class GpuDownsampleFactorMax(GpuOp):
{
for (int k = 0; k < pf3; ++k)
{
if (t
hreadIdx.
x*pf3 + k < xD3)
if (tx*pf3 + k < xD3)
{
cur_x = xbuf[t
hreadIdx.
x*pf3+k];
cur_x = xbuf[tx*pf3+k];
cur_max = (cur_x > cur_max) ? cur_x : cur_max;
}
}
}
}
//store the result to global memory
z[i0*zS0 + i1*zS1 + i2*zS2 + threadIdx.x*zS3] = cur_max;
z[i0*zS0 + i1*zS1 + i2*zS2 + tx*zS3] = cur_max;
}
}
"""
%
locals
()
...
...
@@ -931,7 +933,7 @@ class GpuDownsampleFactorMaxGrad(GpuOp):
return
Apply
(
self
,
[
x
,
z
,
gz
],
[
x
.
type
()])
def
c_code_cache_version
(
self
):
return
(
6
,)
return
(
7
,)
def
c_code
(
self
,
node
,
nodename
,
inp
,
out
,
sub
):
x
,
z
,
gz
=
inp
...
...
@@ -999,7 +1001,11 @@ class GpuDownsampleFactorMaxGrad(GpuOp):
CudaNdarray_HOST_STRIDES(
%(gz)
s)[1],
CudaNdarray_HOST_STRIDES(
%(gz)
s)[2],
CudaNdarray_HOST_STRIDES(
%(gz)
s)[3],
CudaNdarray_DEV_DATA(
%(gx)
s));
CudaNdarray_DEV_DATA(
%(gx)
s),
CudaNdarray_HOST_STRIDES(
%(gx)
s)[0],
CudaNdarray_HOST_STRIDES(
%(gx)
s)[1],
CudaNdarray_HOST_STRIDES(
%(gx)
s)[2],
CudaNdarray_HOST_STRIDES(
%(gx)
s)[3]);
CNDA_THREAD_SYNC;
cudaError_t err = cudaGetLastError();
if( cudaSuccess != err)
...
...
@@ -1037,7 +1043,7 @@ class GpuDownsampleFactorMaxGrad(GpuOp):
const float * x, int xS0, int xS1, int xS2, int xS3,
const float * z, int zS0, int zS1, int zS2, int zS3,
const float * gz, int gzS0, int gzS1, int gzS2, int gzS3,
float *gx)
float *gx
, int gxS0, int gxS1, int gxS2, int gxS3
)
{
// D0: number of image rows
// D1: number of image cols
...
...
@@ -1048,6 +1054,10 @@ class GpuDownsampleFactorMaxGrad(GpuOp):
// various .S. variables are strides
float cur_max, cur_x, my_z, my_gz;
// Cast threadIdx.x into a signed int, to avoid problems with
// indexing with negative offsets.
int tx = threadIdx.x;
for(int i0 = blockIdx.x;
i0 < D0;
i0 += gridDim.x){
...
...
@@ -1056,7 +1066,7 @@ class GpuDownsampleFactorMaxGrad(GpuOp):
// row wrt z and/or gz, ranges from 0 to D2 - 1 OR D2
// (as needed to cover all x rows)
int i2 = blockIdx.y;
int x_col = t
hreadIdx.x;
// col wrt x, ranges from 0 to xD3 - 1
int x_col = t
x;
// col wrt x, ranges from 0 to xD3 - 1
int z_col = x_col/ds1; // z_col corresponding to this x_col
...
...
@@ -1073,7 +1083,7 @@ class GpuDownsampleFactorMaxGrad(GpuOp):
if(blockDim.x != xD3)
{
x_col = t
hreadIdx.
x + col_iter * blockDim.x;
x_col = tx + col_iter * blockDim.x;
z_col = x_col/ds1;
}
...
...
@@ -1108,13 +1118,10 @@ class GpuDownsampleFactorMaxGrad(GpuOp):
// gx[image_row][image_col][x_row][x_col]
// = (my_z == x[image_row][image_col][
// x_row][x_col]) ? my_gz : 0.0f;
gx[i0 * D1*xD2*xD3 + i1*xD2*xD3 +
x_row*xD3 + x_col]
gx[i0*gxS0 + i1*gxS1 + x_row*gxS2 + x_col*gxS3]
= (my_z == x[i0*xS0 + i1*xS1 + x_row*xS2 +
x_col*xS3]) ? my_gz : 0.0f;
}
//gx[i0 * D1*xD2*xD3 + i1*xD2*xD3 +
// x_row*xD3 + x_col] = -999;
}
}
...
...
theano/sandbox/cuda/cuda_ndarray.cu
浏览文件 @
3dc94e90
...
...
@@ -3241,14 +3241,19 @@ static __global__ void k_copy_4d(const int N1,
// These must be made int instead of unsigned int due to a bug in nvcc
int
bx
=
blockIdx
.
x
;
int
by
=
blockIdx
.
y
;
// N1 and N2 are kept in case a future implementation needs to
// loop on the first two dimensions if there are not enough blocks
for
(
int
j
=
threadIdx
.
y
;
j
<
(
int
)
N4
;
j
+=
(
int
)
blockDim
.
y
)
for
(
int
i
=
bx
;
i
<
N1
;
i
+=
gridDim
.
x
)
{
for
(
int
i
=
threadIdx
.
x
;
i
<
N3
;
i
+=
(
int
)
blockDim
.
x
)
for
(
int
j
=
by
;
j
<
N2
;
j
+=
gridDim
.
y
)
{
y
[
bx
*
sy1
+
by
*
sy2
+
i
*
sy3
+
j
*
sy4
]
=
x
[
bx
*
sx1
+
by
*
sx2
+
i
*
sx3
+
j
*
sx4
];
for
(
int
k
=
threadIdx
.
x
;
k
<
N3
;
k
+=
(
int
)
blockDim
.
x
)
{
for
(
int
l
=
threadIdx
.
y
;
l
<
N4
;
l
+=
(
int
)
blockDim
.
y
)
{
y
[
i
*
sy1
+
j
*
sy2
+
k
*
sy3
+
l
*
sy4
]
=
x
[
i
*
sx1
+
j
*
sx2
+
k
*
sx3
+
l
*
sx4
];
}
}
}
}
}
...
...
@@ -3380,8 +3385,10 @@ int CudaNdarray_CopyFromCudaNdarray(CudaNdarray * self,
// The blocks implement the looping over the first two axes so
// this needs to be (N1, N2)
dim3
n_blocks
(
(
unsigned
int
)
CudaNdarray_HOST_DIMS
(
self
)[
0
],
(
unsigned
int
)
CudaNdarray_HOST_DIMS
(
self
)[
1
]);
dim3
n_blocks
(
std
::
min
(
CudaNdarray_HOST_DIMS
(
self
)[
0
],
NUM_VECTOR_OP_BLOCKS
),
std
::
min
(
CudaNdarray_HOST_DIMS
(
self
)[
1
],
NUM_VECTOR_OP_BLOCKS
));
// For the threads, just make as many as possible
dim3
n_threads
(
std
::
min
(
(
unsigned
int
)
CudaNdarray_HOST_DIMS
(
self
)[
2
],
(
unsigned
int
)
NUM_VECTOR_OP_THREADS_PER_BLOCK
),
...
...
theano/sandbox/cuda/cuda_ndarray.cuh
浏览文件 @
3dc94e90
...
...
@@ -12,8 +12,10 @@
#else
#define DllExport __declspec( dllimport )
#endif
#else
#define ALWAYS_INLINE
#else //else _WIN32
#define DllExport
#define ALWAYS_INLINE __attribute__((always_inline))
#endif
typedef
float
real
;
...
...
@@ -134,7 +136,7 @@ CudaNdarray_HOST_STRIDES(const CudaNdarray * self);
DllExport
const
int
*
CudaNdarray_HOST_LOG2DIMS
(
const
CudaNdarray
*
self
);
DllExport
inline
void
__attribute__
((
always_inline
))
DllExport
inline
void
ALWAYS_INLINE
cnda_mark_dev_structure_dirty
(
CudaNdarray
*
self
)
{
self
->
dev_structure_fresh
=
0
;
...
...
@@ -155,7 +157,7 @@ CudaNdarray_Equal(CudaNdarray *cnda1, CudaNdarray *cnda2);
*
* Does not sync structure to device.
*/
DllExport
inline
void
__attribute__
((
always_inline
))
DllExport
inline
void
ALWAYS_INLINE
CudaNdarray_set_dim
(
CudaNdarray
*
self
,
int
idx
,
int
d
)
{
if
((
idx
>=
self
->
nd
)
||
(
idx
<
0
)
||
(
d
<
0
))
...
...
@@ -173,7 +175,7 @@ CudaNdarray_set_dim(CudaNdarray * self, int idx, int d)
}
DllExport
inline
void
__attribute__
((
always_inline
))
DllExport
inline
void
ALWAYS_INLINE
CudaNdarray_set_stride
(
CudaNdarray
*
self
,
int
idx
,
int
s
)
{
if
((
idx
>=
self
->
nd
)
||
(
idx
<
0
))
...
...
@@ -232,7 +234,7 @@ DllExport PyObject * CudaNdarray_new_nd(const int nd);
* Note: This does not allocate storage for data, or free
* pre-existing storage.
*/
DllExport
inline
int
__attribute__
((
always_inline
))
DllExport
inline
int
ALWAYS_INLINE
CudaNdarray_set_nd
(
CudaNdarray
*
self
,
const
int
nd
)
{
if
(
nd
!=
self
->
nd
)
...
...
@@ -434,7 +436,7 @@ CudaNdarray_ZEROS(int n, int * dims);
/**
* True iff the strides look like [dim[nd-2], dim[nd-3], ... , dim[0], 1]
*/
DllExport
inline
bool
__attribute__
((
always_inline
))
DllExport
inline
bool
ALWAYS_INLINE
CudaNdarray_is_c_contiguous
(
const
CudaNdarray
*
self
)
{
bool
c_contiguous
=
true
;
...
...
theano/sandbox/cuda/type.py
浏览文件 @
3dc94e90
...
...
@@ -445,14 +445,14 @@ theano.compile.register_deep_copy_op_c_code(
%(fail)
s;
}
} else {
if(
!
CudaNdarray_CopyFromCudaNdarray(
%(oname)
s,
%(iname)
s)) {
if(CudaNdarray_CopyFromCudaNdarray(
%(oname)
s,
%(iname)
s)) {
PyErr_SetString(PyExc_ValueError,
"DeepCopyOp: the copy failed into already allocated space!");
%(fail)
s;
}
}
"""
,
version
=
2
)
version
=
3
)
# THIS WORKS But CudaNdarray instances don't compare equal to one
...
...
theano/sandbox/multinomial.py
浏览文件 @
3dc94e90
...
...
@@ -128,7 +128,7 @@ class MultinomialFromUniform(Op):
if
unis
.
shape
[
0
]
!=
pvals
.
shape
[
0
]:
raise
ValueError
(
"unis.shape[0] != pvals.shape[0]"
,
unis
.
shape
[
0
],
pvals
.
shape
[
0
])
if
not
z
[
0
]
or
z
[
0
]
.
shape
!=
pvals
.
shape
:
if
z
[
0
]
is
None
or
z
[
0
]
.
shape
!=
pvals
.
shape
:
z
[
0
]
=
numpy
.
zeros
(
pvals
.
shape
,
dtype
=
node
.
outputs
[
0
]
.
dtype
)
nb_multi
=
pvals
.
shape
[
0
]
...
...
theano/sandbox/test_rng_mrg.py
浏览文件 @
3dc94e90
...
...
@@ -696,7 +696,7 @@ def test_random_state_transfer():
"""
Test that random state can be transferred from one theano graph to another.
"""
class
Graph
()
:
class
Graph
:
def
__init__
(
self
,
seed
=
123
):
self
.
rng
=
MRG_RandomStreams
(
seed
)
self
.
y
=
self
.
rng
.
uniform
(
size
=
(
1
,))
...
...
theano/scan_module/tests/test_scan.py
浏览文件 @
3dc94e90
...
...
@@ -3331,8 +3331,8 @@ class T_Scan(unittest.TestCase):
outputs_info
=
[
tensor
.
zeros_like
(
A
)])
f
=
theano
.
function
([
A
,
B
],
S
.
owner
.
inputs
[
0
][
-
1
])
rng
=
numpy
.
random
.
RandomState
(
utt
.
fetch_seed
())
vA
=
rng
.
uniform
(
size
=
(
5
,
5
)
)
vB
=
rng
.
uniform
(
size
=
(
5
,
5
)
)
vA
=
rng
.
uniform
(
size
=
(
5
,
5
))
.
astype
(
theano
.
config
.
floatX
)
vB
=
rng
.
uniform
(
size
=
(
5
,
5
))
.
astype
(
theano
.
config
.
floatX
)
assert
numpy
.
allclose
(
f
(
vA
,
vB
),
numpy
.
dot
(
vA
.
T
,
vB
))
...
...
theano/tensor/blas.py
浏览文件 @
3dc94e90
...
...
@@ -1615,7 +1615,7 @@ def local_gemm_to_ger(node):
yv
=
y
.
dimshuffle
(
1
)
try
:
bval
=
T
.
get_scalar_constant_value
(
b
)
except
T
ype
Error
:
except
T
.
NotScalarConstant
Error
:
# b isn't a constant, GEMM is doing useful pre-scaling
return
...
...
theano/tensor/nnet/conv.py
浏览文件 @
3dc94e90
...
...
@@ -965,7 +965,7 @@ class ConvOp(OpenMPOp):
return
[
'<numpy/noprefix.h>'
,
'<iostream>'
,
'<sstream>'
]
def
c_code_cache_version
(
self
):
return
(
9
,
self
.
openmp
)
return
(
10
,
self
.
openmp
)
def
c_support_code
(
self
):
return
"""
...
...
@@ -1343,14 +1343,24 @@ if (typenum != typenum_f) {
%(fail)
s;
}
if (!img2d)
%(fail)
s;
if (!filtersflipped)
%(fail)
s;
if (!img2d)
{
PyErr_SetString(PyExc_AssertionError, "!img2d");
%(fail)
s;
}
if (!filtersflipped)
{
PyErr_SetString(PyExc_AssertionError, "!filtersflipped");
%(fail)
s;
}
if ((!
%(z)
s)
|| *PyArray_DIMS(
%(z)
s)!=4
||(PyArray_DIMS(
%(z)
s)[0] !=
%(self_bsize)
s)
||(PyArray_DIMS(
%(z)
s)[1] !=
%(self_nkern)
s)
||(PyArray_DIMS(
%(z)
s)[2] != dim_zz[0])
|| (PyArray_DIMS(
%(z)
s)[3] != dim_zz[1])
||(PyArray_DIMS(
%(z)
s)[3] != dim_zz[1])
||!PyArray_ISCONTIGUOUS(
%(z)
s)
)
{
{Py_XDECREF(
%(z)
s);}
...
...
@@ -1370,19 +1380,11 @@ Os[0]=%(self_outshp0)s;
Os[1]=
%(self_outshp1)
s;
//assertions
if (PyArray_STRIDES(
%(z)
s)[0] != PyArray_DIMS(
%(z)
s)[1] *
PyArray_DIMS(
%(z)
s)[2] *
PyArray_DIMS(
%(z)
s)[3] *
(npy_intp)sizeof(
%(type)
s))
%(fail)
s;
if (PyArray_STRIDES(
%(z)
s)[1] != PyArray_DIMS(
%(z)
s)[2] *
PyArray_DIMS(
%(z)
s)[3] *
(npy_intp)sizeof(
%(type)
s))
%(fail)
s;
if (PyArray_STRIDES(
%(z)
s)[2] != PyArray_DIMS(
%(z)
s)[3] * (npy_intp)sizeof(
%(type)
s))
%(fail)
s;
if (PyArray_STRIDES(
%(z)
s)[3] != (npy_intp)sizeof(
%(type)
s))
if (!PyArray_ISCONTIGUOUS(
%(z)
s))
{
PyErr_SetString(PyExc_AssertionError, "Output (
%(z)
s) not contiguous");
%(fail)
s;
}
for(int b=0;b<
%(self_bsize)
s;b++){
for(int n_kern=0;n_kern<
%(self_nkern)
s;n_kern++){
...
...
@@ -1862,14 +1864,24 @@ typenum_f = PyArray_ObjectType((PyObject*)%(filtersflipped)s, 0);
if (typenum < 0) {PyErr_SetString(PyExc_ValueError, "Invalid type");
%(fail)
s;}
if (typenum != typenum_f) {PyErr_SetString(PyExc_ValueError, "Input types must match");
%(fail)
s;}
if (!img2d)
%(fail)
s;
if (!filtersflipped)
%(fail)
s;
if (!img2d)
{
PyErr_SetString(PyExc_AssertionError, "!img2d");
%(fail)
s;
}
if (!filtersflipped)
{
PyErr_SetString(PyExc_AssertionError, "!filtersflipped");
%(fail)
s;
}
if ((!
%(z)
s)
|| *PyArray_DIMS(
%(z)
s)!=4
||(PyArray_DIMS(
%(z)
s)[0] !=
%(self_bsize)
s)
||(PyArray_DIMS(
%(z)
s)[1] !=
%(self_nkern)
s)
||(PyArray_DIMS(
%(z)
s)[2] != dim_zz[0])
|| (PyArray_DIMS(
%(z)
s)[3] != dim_zz[1])
||(PyArray_DIMS(
%(z)
s)[3] != dim_zz[1])
||!PyArray_ISCONTIGUOUS(
%(z)
s)
)
{
{Py_XDECREF(
%(z)
s);}
...
...
@@ -1889,10 +1901,11 @@ Os[0]=%(self_outshp0)s;
Os[1]=
%(self_outshp1)
s;
//assertions
if (PyArray_STRIDES(
%(z)
s)[0] != PyArray_DIMS(
%(z)
s)[1] *PyArray_DIMS(
%(z)
s)[2] *PyArray_DIMS(
%(z)
s)[3] * (npy_intp)sizeof(
%(type)
s))
%(fail)
s;
if (PyArray_STRIDES(
%(z)
s)[1] != PyArray_DIMS(
%(z)
s)[2] * PyArray_DIMS(
%(z)
s)[3] * (npy_intp)sizeof(
%(type)
s))
%(fail)
s;
if (PyArray_STRIDES(
%(z)
s)[2] != PyArray_DIMS(
%(z)
s)[3] * (npy_intp)sizeof(
%(type)
s))
%(fail)
s;
if (PyArray_STRIDES(
%(z)
s)[3] != (npy_intp)sizeof(
%(type)
s))
%(fail)
s;
if (!PyArray_ISCONTIGUOUS(
%(z)
s))
{
PyErr_SetString(PyExc_AssertionError, "Output (
%(z)
s) not contiguous");
%(fail)
s;
}
for(int b=0;b<
%(self_bsize)
s ;b+=
%(unroll_bsize)
s){
for(int n_kern=0;n_kern<
%(self_nkern)
s;n_kern+=
%(unroll_ksize)
s){
...
...
theano/tensor/opt.py
浏览文件 @
3dc94e90
...
...
@@ -1185,6 +1185,9 @@ def local_subtensor_make_vector(node):
# if it is a constant we can do something with it
try
:
v
=
get_scalar_constant_value
(
idx
)
if
isinstance
(
v
,
numpy
.
integer
):
# Python 2.4 wants to index only with Python integers
v
=
int
(
v
)
return
[
x
.
owner
.
inputs
[
v
]]
except
NotScalarConstantError
:
pass
...
...
theano/tensor/tests/test_basic.py
浏览文件 @
3dc94e90
...
...
@@ -417,8 +417,8 @@ def makeTester(name, op, expected, checks=None, good=None, bad_build=None,
def
rand
(
*
shape
):
r
=
numpy
.
asarray
(
numpy
.
random
.
rand
(
*
shape
),
dtype
=
config
.
floatX
)
return
r
*
2
-
1
r
=
numpy
.
random
.
rand
(
*
shape
)
*
2
-
1
return
numpy
.
asarray
(
r
,
dtype
=
config
.
floatX
)
def
rand_nonzero
(
shape
,
eps
=
3e-4
):
...
...
@@ -4270,8 +4270,9 @@ class t_dot(unittest.TestCase):
return
type
(
x
),
x
.
dtype
,
x
.
shape
nz
=
numpy
.
dot
(
x
,
y
)
tz
=
eval_outputs
([
dot
(
as_tensor_variable
(
x
),
as_tensor_variable
(
y
))])
self
.
assertTrue
(
tz
.
dtype
==
nz
.
dtype
)
self
.
assertTrue
(
tz
.
shape
==
nz
.
shape
)
self
.
assertTrue
(
tz
.
dtype
==
nz
.
dtype
,
(
tz
.
dtype
,
tz
.
dtype
.
num
,
nz
.
dtype
,
nz
.
dtype
.
num
))
self
.
assertTrue
(
tz
.
shape
==
nz
.
shape
,
(
tz
.
shape
,
nz
.
shape
))
self
.
assertTrue
(
_approx_eq
(
nz
,
tz
))
def
test_Op_dims
(
self
):
...
...
@@ -4300,19 +4301,19 @@ class t_dot(unittest.TestCase):
self
.
assertRaises
(
TypeError
,
_dot
,
d3
,
d3
)
def
test_dot_0d_0d
(
self
):
self
.
cmp_dot
(
1.1
,
2.2
)
self
.
cmp_dot
(
rand
(),
rand
()
)
def
test_dot_0d_1d
(
self
):
self
.
cmp_dot
(
1.1
,
rand
(
5
))
self
.
cmp_dot
(
rand
()
,
rand
(
5
))
def
test_dot_0d_2d
(
self
):
self
.
cmp_dot
(
3.0
,
rand
(
6
,
7
))
self
.
cmp_dot
(
rand
()
,
rand
(
6
,
7
))
def
test_dot_0d_3d
(
self
):
self
.
cmp_dot
(
3.0
,
rand
(
8
,
6
,
7
))
self
.
cmp_dot
(
rand
()
,
rand
(
8
,
6
,
7
))
def
test_dot_1d_0d
(
self
):
self
.
cmp_dot
(
rand
(
5
),
1.1
)
self
.
cmp_dot
(
rand
(
5
),
rand
()
)
def
test_dot_1d_1d
(
self
):
self
.
cmp_dot
(
rand
(
5
),
rand
(
5
))
...
...
@@ -4344,7 +4345,7 @@ class t_dot(unittest.TestCase):
self
.
cmp_dot
(
rand
(
6
),
rand
(
8
,
6
,
7
))
def
test_dot_2d_0d
(
self
):
self
.
cmp_dot
(
rand
(
5
,
6
),
1.0
)
self
.
cmp_dot
(
rand
(
5
,
6
),
rand
()
)
def
test_dot_2d_1d
(
self
):
self
.
cmp_dot
(
rand
(
5
,
6
),
rand
(
6
))
...
...
@@ -4380,7 +4381,7 @@ class t_dot(unittest.TestCase):
self
.
cmp_dot
(
rand
(
5
,
6
),
rand
(
8
,
6
,
7
))
def
test_dot_3d_0d
(
self
):
self
.
cmp_dot
(
rand
(
4
,
5
,
6
),
1.0
)
self
.
cmp_dot
(
rand
(
4
,
5
,
6
),
rand
()
)
def
test_dot_3d_1d
(
self
):
self
.
cmp_dot
(
rand
(
4
,
5
,
6
),
rand
(
6
))
...
...
theano/tensor/tests/test_blas.py
浏览文件 @
3dc94e90
...
...
@@ -5,6 +5,7 @@ import sys
import
theano.tensor
as
T
from
theano
import
tensor
from
theano.gof.python25
import
product
as
itertools_product
from
theano.gof.python25
import
any
from
theano.printing
import
pp
import
numpy
...
...
@@ -857,7 +858,6 @@ def test_dot22():
assert
_dot22
in
[
x
.
op
for
x
in
topo
],
(
dtype1
,
dtype2
)
else
:
check
=
[
isinstance
(
x
.
op
,
T
.
Dot
)
for
x
in
topo
]
from
theano.gof.python25
import
any
assert
any
(
check
),
(
dtype1
,
dtype2
)
rng
=
numpy
.
random
.
RandomState
(
unittest_tools
.
fetch_seed
())
...
...
@@ -1603,6 +1603,13 @@ class TestGer(TestCase, unittest_tools.TestOptimizationMixin):
self
.
A
,
self
.
a
,
self
.
x
.
dimshuffle
(
0
,
'x'
),
self
.
y
.
dimshuffle
(
'x'
,
0
),
self
.
b
(
1.5
))
.
owner
)
def
test_b_nonconst_does_not_triggers_ger
(
self
):
""" test local_gemm_to_ger opt"""
assert
not
T
.
blas
.
local_gemm_to_ger
.
transform
(
gemm_no_inplace
(
self
.
A
,
self
.
a
,
self
.
x
.
dimshuffle
(
0
,
'x'
),
self
.
y
.
dimshuffle
(
'x'
,
0
),
self
.
a
)
.
owner
)
def
test_outer
(
self
):
f
=
self
.
function
([
self
.
x
,
self
.
y
],
T
.
outer
(
self
.
x
,
self
.
y
))
self
.
assertFunctionContains
(
f
,
self
.
ger_destructive
)
...
...
theano/tests/run_tests_in_batch.py
浏览文件 @
3dc94e90
...
...
@@ -101,7 +101,7 @@ def main(stdout=None, stderr=None, argv=None, theano_nose=None,
theano_nose
=
path
break
if
theano_nose
is
None
:
raise
Exception
(
"Not able to find theano
_
nose"
)
raise
Exception
(
"Not able to find theano
-
nose"
)
if
batch_size
is
None
:
batch_size
=
100
stdout_backup
=
sys
.
stdout
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论