Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
23e43b1b
提交
23e43b1b
authored
10月 03, 2016
作者:
Frederic Bastien
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Change the prepare_node logic to make it safe to call make_py_thunk and make_c_thunk directly.
上级
179e4085
显示空白字符变更
内嵌
并排
正在显示
28 个修改的文件
包含
87 行增加
和
113 行删除
+87
-113
extending_theano.txt
doc/extending/extending_theano.txt
+4
-3
op.txt
doc/extending/op.txt
+3
-1
gpu_data_convert.txt
doc/tutorial/gpu_data_convert.txt
+1
-1
using_gpu.txt
doc/tutorial/using_gpu.txt
+1
-1
builders.py
theano/compile/builders.py
+2
-2
debugmode.py
theano/compile/debugmode.py
+3
-2
cc.py
theano/gof/cc.py
+3
-12
link.py
theano/gof/link.py
+3
-2
op.py
theano/gof/op.py
+24
-17
test_lazy.py
theano/gof/tests/test_lazy.py
+2
-2
vm.py
theano/gof/vm.py
+4
-6
elemwise.py
theano/gpuarray/elemwise.py
+1
-1
fft.py
theano/gpuarray/fft.py
+2
-2
ifelse.py
theano/ifelse.py
+1
-1
pycuda_example.py
theano/misc/pycuda_example.py
+1
-1
__init__.py
theano/sandbox/cuda/__init__.py
+1
-1
blas.py
theano/sandbox/cuda/blas.py
+1
-1
cula.py
theano/sandbox/cuda/cula.py
+1
-4
dnn.py
theano/sandbox/cuda/dnn.py
+4
-3
extra_ops.py
theano/sandbox/cuda/extra_ops.py
+2
-10
fftconv.py
theano/sandbox/cuda/fftconv.py
+4
-4
basic.py
theano/scalar/basic.py
+4
-2
scan_op.py
theano/scan_module/scan_op.py
+4
-11
blas.py
theano/tensor/blas.py
+0
-3
blas_scipy.py
theano/tensor/blas_scipy.py
+1
-1
elemwise.py
theano/tensor/elemwise.py
+5
-11
opt.py
theano/tensor/opt.py
+3
-6
pool.py
theano/tensor/signal/pool.py
+2
-2
没有找到文件。
doc/extending/extending_theano.txt
浏览文件 @
23e43b1b
...
...
@@ -99,7 +99,7 @@ possibilities you may encounter or need. For that refer to
pass
# Other implementations (pycuda, ...):
def make_thunk(self, node, storage_map, _, _2):
def make_thunk(self, node, storage_map, _, _2
, impl=None
):
pass
# optional:
...
...
@@ -190,11 +190,12 @@ or :func:`make_thunk`.
valid, but shouldn't be required anymore for this call.
The returned function must ensure that it sets the computed
variables as computed in the `compute_map`.
- ``impl`` allow to select between multiple implementation.
It should have a default value of None.
:func:`make_thunk` is useful if you want to generate code and compile
it yourself. For example, this allows you to use PyCUDA to compile GPU
code.
code
and keep state in the thunk
.
If :func:`make_thunk()` is defined by an op, it will be used by Theano
to obtain the op's implementation.
...
...
doc/extending/op.txt
浏览文件 @
23e43b1b
...
...
@@ -171,7 +171,7 @@ Optional methods or attributes
returned, unless it is of length 1, where the single element will be
returned by itself.
.. function:: make_thunk(node, storage_map, compute_map, no_recycling)
.. function:: make_thunk(node, storage_map, compute_map, no_recycling
, impl=None
)
This function must return a thunk, that is a zero-arguments
function that encapsulates the computation to be performed by this
...
...
@@ -192,6 +192,8 @@ Optional methods or attributes
valid, but shouldn't be required anymore for this call.
:param no_recycling: WRITEME
WRITEME
:param impl: None, 'c' or 'py'
Which implementation to use.
The returned function must ensure that is sets the computed
variables as computed in the `compute_map`.
...
...
doc/tutorial/gpu_data_convert.txt
浏览文件 @
23e43b1b
...
...
@@ -92,7 +92,7 @@ You can use a GPU function compiled with PyCUDA in a Theano op:
cuda.basic_ops.as_cuda_ndarray_variable(inp))
assert inp.dtype == "float32"
return theano.Apply(self, [inp], [inp.type()])
def make_thunk(self, node, storage_map, _, _2):
def make_thunk(self, node, storage_map, _, _2
, impl=None
):
mod = SourceModule("""
__global__ void my_fct(float * i0, float * o0, int size) {
int i = blockIdx.x * blockDim.x + threadIdx.x;
...
...
doc/tutorial/using_gpu.txt
浏览文件 @
23e43b1b
...
...
@@ -586,7 +586,7 @@ Modify and execute to work for a matrix of shape (20, 10).
assert inp.dtype == "float32"
return theano.Apply(self, [inp], [inp.type()])
def make_thunk(self, node, storage_map, _, _2):
def make_thunk(self, node, storage_map, _, _2
, impl
):
mod = SourceModule("""
__global__ void my_fct(float * i0, float * o0, int size) {
int i = blockIdx.x*blockDim.x + threadIdx.x;
...
...
theano/compile/builders.py
浏览文件 @
23e43b1b
...
...
@@ -124,8 +124,8 @@ class OpFromGraph(gof.Op):
list
(
inputs
)
+
self
.
shared_inputs
,
[
type
()
for
type
in
self
.
output_types
])
def
prepare_node
(
self
,
node
,
storage_map
,
compute_map
):
if
not
hasattr
(
node
.
tag
,
"fn"
):
def
prepare_node
(
self
,
node
,
storage_map
,
compute_map
,
impl
):
if
not
hasattr
(
node
.
tag
,
"fn"
)
and
impl
==
'py'
:
node
.
tag
.
fn
=
orig_function
(
self
.
new_inputs
,
self
.
new_outputs
,
**
self
.
kwargs
)
...
...
theano/compile/debugmode.py
浏览文件 @
23e43b1b
...
...
@@ -1837,8 +1837,6 @@ class _Linker(gof.link.LocalLinker):
thunk
.
inputs
=
[
storage_map
[
v
]
for
v
in
node
.
inputs
]
thunk
.
outputs
=
[
storage_map
[
v
]
for
v
in
node
.
outputs
]
thunk_other
=
thunk
else
:
node
.
op
.
prepare_node
(
node
,
storage_map
,
compute_map
)
debug
=
hasattr
(
node
.
op
,
'debug_perform'
)
...
...
@@ -1852,6 +1850,7 @@ class _Linker(gof.link.LocalLinker):
if
not
isinstance
(
node
.
op
,
gof
.
op
.
Op
):
raise
utils
.
MethodNotDefined
()
node
.
op
.
prepare_node
(
node
,
storage_map
,
compute_map
,
'c'
)
thunk
=
node
.
op
.
make_c_thunk
(
node
,
storage_map
,
compute_map
,
no_recycling
)
thunks_c
.
append
(
thunk
)
...
...
@@ -1864,6 +1863,7 @@ class _Linker(gof.link.LocalLinker):
if
(((
self
.
maker
.
mode
.
check_py_code
or
thunks_c
[
-
1
]
is
None
)
and
node
.
op
.
perform
.
__code__
!=
gof
.
op
.
PureOp
.
perform
.
__code__
)
or
debug
):
node
.
op
.
prepare_node
(
node
,
storage_map
,
compute_map
,
'py'
)
thunk
=
node
.
op
.
make_py_thunk
(
node
,
storage_map
,
compute_map
,
no_recycling
,
debug
=
debug
)
thunks_py
.
append
(
thunk
)
...
...
@@ -1873,6 +1873,7 @@ class _Linker(gof.link.LocalLinker):
if
not
self
.
maker
.
mode
.
check_c_code
and
thunks_py
[
-
1
]
is
None
:
_logger
.
warn
(
"Op
%
s doesn't have a perform, "
"forcing check of the C code"
%
node
.
op
)
node
.
op
.
prepare_node
(
node
,
storage_map
,
compute_map
,
'c'
)
thunk
=
node
.
op
.
make_c_thunk
(
node
,
storage_map
,
compute_map
,
no_recycling
)
thunks_c
[
-
1
]
=
thunk
...
...
theano/gof/cc.py
浏览文件 @
23e43b1b
...
...
@@ -1584,7 +1584,7 @@ class CLinker(link.Linker):
else
:
# Set compute_map as None as clinker do not support lazy evaluation
for
node
in
self
.
node_order
:
node
.
op
.
prepare_node
(
node
,
storage_map
,
None
)
node
.
op
.
prepare_node
(
node
,
storage_map
,
None
,
'c'
)
module
=
get_module_cache
()
.
module_from_key
(
key
=
key
,
lnk
=
self
,
keep_lock
=
keep_lock
)
...
...
@@ -1787,17 +1787,8 @@ class OpWiseCLinker(link.LocalLinker):
thunks
=
[]
for
node
in
order
:
# Maker sure we use the C version of the code whenever
# possible
# There are ops that don't have _op_use_c_code property
# for example ifelse (or any ops that come with their own
# make_thunk
if
theano
.
config
.
cxx
:
thunks
+=
[
node
.
op
.
make_c_thunk
(
node
,
storage_map
,
compute_map
,
no_recycling
)]
else
:
# make_thunk will try by default C code, otherwise
# it fall back to python.
thunks
+=
[
node
.
op
.
make_thunk
(
node
,
storage_map
,
compute_map
,
...
...
theano/gof/link.py
浏览文件 @
23e43b1b
...
...
@@ -823,10 +823,11 @@ class PerformLinker(LocalLinker):
# the python version
# Note : ops that implement their own make thunk don't usually
# have this attribute defiend !!
thunks
+=
[
node
.
op
.
make_
py_
thunk
(
node
,
thunks
+=
[
node
.
op
.
make_thunk
(
node
,
storage_map
,
compute_map
,
no_recycling
)]
no_recycling
,
'py'
)]
thunks
[
-
1
]
.
inputs
=
[
storage_map
[
v
]
for
v
in
node
.
inputs
]
thunks
[
-
1
]
.
outputs
=
[
storage_map
[
v
]
for
v
in
node
.
outputs
]
...
...
theano/gof/op.py
浏览文件 @
23e43b1b
...
...
@@ -792,19 +792,22 @@ class Op(utils.object2, PureOp, CLinkerOp):
def
__init__
(
self
,
use_c_code
=
theano
.
config
.
cxx
):
self
.
_op_use_c_code
=
use_c_code
def
prepare_node
(
self
,
node
,
storage_map
,
compute_map
):
def
prepare_node
(
self
,
node
,
storage_map
,
compute_map
,
impl
):
"""
Make any special modifications that the Op needs before doing
make_thunk().
This can modify the node inplace and should return nothing.
It can be called multiple time with different impl. It is the
op responsability to don't re-prepare the node when it isn't
good to do so.
"""
pass
def
make_c_thunk
(
self
,
node
,
storage_map
,
compute_map
,
no_recycling
):
"""
Like make_thunk, but will only try to make a C thunk.
"""Like make_thunk, but will only try to make a C thunk.
"""
node_input_storage
=
[
storage_map
[
r
]
for
r
in
node
.
inputs
]
...
...
@@ -883,7 +886,8 @@ class Op(utils.object2, PureOp, CLinkerOp):
rval
.
lazy
=
False
return
rval
def
make_thunk
(
self
,
node
,
storage_map
,
compute_map
,
no_recycling
):
def
make_thunk
(
self
,
node
,
storage_map
,
compute_map
,
no_recycling
,
impl
=
None
):
"""
This function must return a thunk, that is a zero-arguments
function that encapsulates the computation to be performed
...
...
@@ -904,6 +908,9 @@ class Op(utils.object2, PureOp, CLinkerOp):
no_recycling
List of variables for which it is forbidden to reuse memory
allocated by a previous call.
impl
Currently, None, 'c' or 'py'. If 'c' or 'py' we will only try
that version of the code.
Notes
-----
...
...
@@ -913,26 +920,26 @@ class Op(utils.object2, PureOp, CLinkerOp):
the thunk can potentially cache return values (like CLinker does),
then it must not do so for variables in the no_recycling list.
self.prepare_node(node, ...) is always called. If we try 'c' and it
fail and we try again 'py', prepare_node will be called twice.
"""
if
impl
is
None
or
impl
==
'c'
:
self
.
prepare_node
(
node
,
storage_map
=
storage_map
,
compute_map
=
compute_map
)
if
not
hasattr
(
self
,
'_op_use_c_code'
):
warnings
.
warn
(
"The __getstate__ method of '
%
s' is not implemented correctly."
" It should keep the attributes added by the base class."
" To implement it correctly, it should keep all attributes"
" and only remove those it does not want."
%
(
self
),
stacklevel
=
2
)
if
getattr
(
self
,
'_op_use_c_code'
,
theano
.
config
.
cxx
):
compute_map
=
compute_map
,
impl
=
'c'
)
try
:
return
self
.
make_c_thunk
(
node
,
storage_map
,
compute_map
,
no_recycling
)
except
(
NotImplementedError
,
utils
.
MethodNotDefined
):
# We requested the c code, so don't catch the error.
if
impl
==
'c'
:
raise
_logger
.
debug
(
'Falling back on perform'
)
# condition: either there was no c_code, or it failed
# condition: either there was no c_code, or it failed or
# python code was requested.
self
.
prepare_node
(
node
,
storage_map
=
storage_map
,
compute_map
=
compute_map
,
impl
=
'py'
)
return
self
.
make_py_thunk
(
node
,
storage_map
,
compute_map
,
no_recycling
)
def
make_node
(
self
,
*
inputs
):
...
...
@@ -1195,8 +1202,8 @@ int main( int argc, const char* argv[] )
self
.
openmp
=
False
theano
.
config
.
openmp
=
False
def
prepare_node
(
self
,
node
,
storage_map
,
compute_map
)
:
def
prepare_node
(
self
,
node
,
storage_map
,
compute_map
,
impl
):
if
impl
==
'c'
:
self
.
update_self_openmp
()
...
...
theano/gof/tests/test_lazy.py
浏览文件 @
23e43b1b
...
...
@@ -25,7 +25,7 @@ class IfElseIfElseIf(PureOp):
assert
t3
.
type
==
f3
.
type
return
Apply
(
self
,
[
c1
,
t1
,
c2
,
t2
,
c3
,
t3
,
f3
],
[
t1
.
type
()])
def
make_thunk
(
self
,
node
,
storage_map
,
compute_map
,
no_recycling
):
def
make_thunk
(
self
,
node
,
storage_map
,
compute_map
,
no_recycling
,
impl
):
input_computed
=
[
compute_map
[
v
]
for
v
in
node
.
inputs
]
output_computed
=
[
compute_map
[
v
]
for
v
in
node
.
outputs
]
...
...
@@ -93,7 +93,7 @@ class NotImplementedOp(PureOp):
def
make_node
(
self
,
x
):
return
Apply
(
self
,
[
x
],
[
x
.
type
()])
def
make_thunk
(
self
,
node
,
storage_map
,
compute_map
,
no_recycling
):
def
make_thunk
(
self
,
node
,
storage_map
,
compute_map
,
no_recycling
,
impl
):
def
thunk
():
raise
self
.
E
()
thunk
.
lazy
=
False
...
...
theano/gof/vm.py
浏览文件 @
23e43b1b
...
...
@@ -1043,16 +1043,14 @@ class VM_Linker(link.LocalLinker):
t0
=
time
.
time
()
for
node
in
order
:
try
:
impl
=
None
if
self
.
c_thunks
is
False
:
thunks
.
append
(
node
.
op
.
make_py_thunk
(
node
,
storage_map
,
compute_map
,
no_recycling
))
else
:
impl
=
'py'
thunks
.
append
(
node
.
op
.
make_thunk
(
node
,
storage_map
,
compute_map
,
no_recycling
))
no_recycling
,
impl
=
impl
))
if
not
hasattr
(
thunks
[
-
1
],
'lazy'
):
# We don't want all ops maker to think about lazy Ops.
# So if they didn't specify that its lazy or not, it isn't.
...
...
theano/gpuarray/elemwise.py
浏览文件 @
23e43b1b
...
...
@@ -2640,7 +2640,7 @@ class GpuCAReduceCPY(GpuKernelBase, HideC, CAReduceDtype):
def
get_params
(
self
,
node
):
return
node
.
outputs
[
0
]
.
type
.
context
def
prepare_node
(
self
,
node
,
storage_map
,
compute_map
):
def
prepare_node
(
self
,
node
,
storage_map
,
compute_map
,
impl
):
# cache the kernel object
self
.
get_kernel_cache
(
node
)
...
...
theano/gpuarray/fft.py
浏览文件 @
23e43b1b
...
...
@@ -73,7 +73,7 @@ class CuRFFTOp(Op):
return
theano
.
Apply
(
self
,
[
inp
,
s
],
[
self
.
output_type
(
inp
)()])
def
make_thunk
(
self
,
node
,
storage_map
,
_
,
_2
):
def
make_thunk
(
self
,
node
,
storage_map
,
_
,
_2
,
impl
=
None
):
inputs
=
[
storage_map
[
v
]
for
v
in
node
.
inputs
]
outputs
=
[
storage_map
[
v
]
for
v
in
node
.
outputs
]
...
...
@@ -198,7 +198,7 @@ class CuIRFFTOp(Op):
return
theano
.
Apply
(
self
,
[
inp
,
s
],
[
self
.
output_type
(
inp
)()])
def
make_thunk
(
self
,
node
,
storage_map
,
_
,
_2
):
def
make_thunk
(
self
,
node
,
storage_map
,
_
,
_2
,
impl
=
None
):
inputs
=
[
storage_map
[
v
]
for
v
in
node
.
inputs
]
outputs
=
[
storage_map
[
v
]
for
v
in
node
.
outputs
]
...
...
theano/ifelse.py
浏览文件 @
23e43b1b
...
...
@@ -235,7 +235,7 @@ class IfElse(Op):
if_true_op
(
*
if_true
,
**
dict
(
return_list
=
True
))
+
if_false_op
(
*
if_false
,
**
dict
(
return_list
=
True
)))
def
make_
py_thunk
(
self
,
node
,
storage_map
,
compute_map
,
no_recycling
):
def
make_
thunk
(
self
,
node
,
storage_map
,
compute_map
,
no_recycling
,
impl
=
None
):
cond
=
node
.
inputs
[
0
]
ts
=
node
.
inputs
[
1
:][:
self
.
n_outs
]
fs
=
node
.
inputs
[
1
:][
self
.
n_outs
:]
...
...
theano/misc/pycuda_example.py
浏览文件 @
23e43b1b
...
...
@@ -320,7 +320,7 @@ class PycudaElemwiseSourceModuleMakeThunkOp(Op):
out_node
=
Apply
(
self
,
_inputs
,
[
otype
()
for
o
in
xrange
(
self
.
nout
)])
return
out_node
def
make_thunk
(
self
,
node
,
storage_map
,
_
,
_2
):
def
make_thunk
(
self
,
node
,
storage_map
,
_
,
_2
,
impl
=
None
):
# TODO support broadcast!
# TODO assert all input have the same shape
fct_name
=
"pycuda_elemwise_
%
s"
%
str
(
self
.
scalar_op
)
...
...
theano/sandbox/cuda/__init__.py
浏览文件 @
23e43b1b
...
...
@@ -246,7 +246,7 @@ class GpuOp(theano.gof.Op):
"""
def
prepare_node
(
self
,
node
,
storage_map
,
compute_map
):
def
prepare_node
(
self
,
node
,
storage_map
,
compute_map
,
impl
):
if
use
.
device_number
is
None
:
use
(
"gpu"
,
force
=
True
,
...
...
theano/sandbox/cuda/blas.py
浏览文件 @
23e43b1b
...
...
@@ -2119,7 +2119,7 @@ class GpuConv(GpuOp):
images
[
2
]
*
images
[
3
]
*
2
)
return
flops
def
prepare_node
(
self
,
node
,
storage_map
,
compute_map
):
def
prepare_node
(
self
,
node
,
storage_map
,
compute_map
,
impl
):
super
(
GpuConv
,
self
)
.
prepare_node
(
node
,
storage_map
,
compute_map
,
impl
)
if
node
.
op
.
max_threads_dim0
is
None
:
...
...
theano/sandbox/cuda/cula.py
浏览文件 @
23e43b1b
...
...
@@ -51,10 +51,7 @@ class GpuSolve(GpuOp):
assert
inp2
.
ndim
==
2
return
theano
.
Apply
(
self
,
[
inp1
,
inp2
],
[
self
.
output_type
(
inp1
)()])
def
make_thunk
(
self
,
node
,
storage_map
,
_
,
no_recycling
=
[]):
def
make_thunk
(
self
,
node
,
storage_map
,
_
,
no_recycling
,
impl
=
None
):
# Initialize CULA the first time it is needed
global
cula_initialized
...
...
theano/sandbox/cuda/dnn.py
浏览文件 @
23e43b1b
...
...
@@ -1512,8 +1512,9 @@ class GpuDnnPool(DnnBase):
assert
mode
in
(
'max'
,
'average_inc_pad'
,
'average_exc_pad'
)
self
.
mode
=
mode
def
prepare_node
(
self
,
node
,
storage_map
,
compute_map
):
super
(
GpuDnnPool
,
self
)
.
prepare_node
(
node
,
storage_map
,
compute_map
)
def
prepare_node
(
self
,
node
,
storage_map
,
compute_map
,
impl
):
super
(
GpuDnnPool
,
self
)
.
prepare_node
(
node
,
storage_map
,
compute_map
,
impl
)
if
len
(
node
.
inputs
)
==
2
:
warnings
.
warn
(
"Theano GPUDnnPoolGrad internal changed."
,
stacklevel
=
3
)
...
...
@@ -1752,7 +1753,7 @@ class GpuDnnPoolGrad(DnnBase):
assert
mode
in
(
'max'
,
'average_inc_pad'
,
'average_exc_pad'
)
self
.
mode
=
mode
def
prepare_node
(
self
,
node
,
storage_map
,
compute_map
):
def
prepare_node
(
self
,
node
,
storage_map
,
compute_map
,
impl
):
if
len
(
node
.
inputs
)
==
4
:
warnings
.
warn
(
"Theano GPUDnnPoolGrad internal changed."
,
stacklevel
=
3
)
# Old interface
...
...
theano/sandbox/cuda/extra_ops.py
浏览文件 @
23e43b1b
...
...
@@ -49,20 +49,12 @@ class GpuCumsum(CumsumOp, GpuOp):
return
theano
.
Apply
(
self
,
[
x
],
[
x
.
type
()])
def
make_thunk
(
self
,
node
,
storage_map
,
compute_map
,
no_recycling
):
def
make_thunk
(
self
,
node
,
storage_map
,
compute_map
,
no_recycling
,
impl
=
None
):
node_
=
copy
.
copy
(
node
)
assert
node
.
op
is
node_
.
op
if
node_
.
op
.
max_threads_dim0
is
None
or
node_
.
op
.
max_grid_size1
is
None
or
node_
.
op
.
max_grid_size2
is
None
:
cuda
=
theano
.
sandbox
.
cuda
device_id
=
cuda
.
use
.
device_number
if
device_id
is
None
:
cuda
.
use
(
"gpu"
,
force
=
False
,
default_to_move_computation_to_gpu
=
False
,
move_shared_float32_to_gpu
=
False
,
enable_cuda
=
False
,
test_driver
=
True
)
device_id
=
cuda
.
use
.
device_number
cuda_ndarray
=
theano
.
sandbox
.
cuda
.
cuda_ndarray
.
cuda_ndarray
prop
=
cuda_ndarray
.
device_properties
(
device_id
)
node_
.
op
.
max_threads_dim0
=
prop
[
'maxThreadsDim0'
]
...
...
@@ -70,7 +62,7 @@ class GpuCumsum(CumsumOp, GpuOp):
node_
.
op
.
max_grid_size2
=
prop
[
'maxGridSize2'
]
return
super
(
GpuCumsum
,
node_
.
op
)
.
make_thunk
(
node_
,
storage_map
,
compute_map
,
no_recycling
)
compute_map
,
no_recycling
,
impl
)
def
__str__
(
self
):
return
"
%
s{
%
s}"
%
(
self
.
__class__
.
__name__
,
self
.
axis
)
...
...
theano/sandbox/cuda/fftconv.py
浏览文件 @
23e43b1b
...
...
@@ -48,7 +48,7 @@ class ScikitsCudaOp(GpuOp):
return
theano
.
Apply
(
self
,
[
inp
],
[
self
.
output_type
(
inp
)()])
def
make_thunk
(
self
,
node
,
storage_map
,
_
,
_2
):
def
make_thunk
(
self
,
node
,
storage_map
,
_
,
_2
,
impl
=
None
):
if
not
scikits_cuda_available
:
raise
RuntimeError
(
"scikits.cuda is needed for all GPU fft implementation,"
...
...
@@ -61,7 +61,7 @@ class CuFFTOp(ScikitsCudaOp):
return
CudaNdarrayType
(
broadcastable
=
[
False
]
*
(
inp
.
type
.
ndim
+
1
))
def
make_thunk
(
self
,
node
,
storage_map
,
_
,
_2
):
def
make_thunk
(
self
,
node
,
storage_map
,
_
,
_2
,
impl
=
None
):
super
(
CuFFTOp
,
self
)
.
make_thunk
(
node
,
storage_map
,
_
,
_2
)
from
theano.misc.pycuda_utils
import
to_gpuarray
...
...
@@ -118,7 +118,7 @@ class CuIFFTOp(ScikitsCudaOp):
return
CudaNdarrayType
(
broadcastable
=
[
False
]
*
(
inp
.
type
.
ndim
-
1
))
def
make_thunk
(
self
,
node
,
storage_map
,
_
,
_2
):
def
make_thunk
(
self
,
node
,
storage_map
,
_
,
_2
,
impl
=
None
):
super
(
CuIFFTOp
,
self
)
.
make_thunk
(
node
,
storage_map
,
_
,
_2
)
from
theano.misc.pycuda_utils
import
to_gpuarray
...
...
@@ -314,7 +314,7 @@ class BatchedComplexDotOp(ScikitsCudaOp):
def
output_type
(
self
,
inp
):
return
CudaNdarrayType
(
broadcastable
=
[
False
]
*
inp
.
type
.
ndim
)
def
make_thunk
(
self
,
node
,
storage_map
,
_
,
_2
):
def
make_thunk
(
self
,
node
,
storage_map
,
_
,
_2
,
impl
=
None
):
super
(
BatchedComplexDotOp
,
self
)
.
make_thunk
(
node
,
storage_map
,
_
,
_2
)
inputs
=
[
storage_map
[
v
]
for
v
in
node
.
inputs
]
...
...
theano/scalar/basic.py
浏览文件 @
23e43b1b
...
...
@@ -3664,10 +3664,12 @@ class Composite(ScalarOp):
# self.init_name() # self.name
self
.
name
=
None
def
prepare_node
(
self
,
node
,
storage_map
,
compute_map
):
def
prepare_node
(
self
,
node
,
storage_map
,
compute_map
,
impl
):
if
impl
==
'py'
:
self
.
init_py_impls
()
# self._impls
elif
impl
==
'c'
:
for
n
in
theano
.
gof
.
graph
.
list_of_nodes
(
self
.
inputs
,
self
.
outputs
):
n
.
op
.
prepare_node
(
n
,
None
,
None
)
n
.
op
.
prepare_node
(
n
,
None
,
None
,
impl
)
def
output_types
(
self
,
input_types
):
if
tuple
(
input_types
)
!=
self
.
inputs_type
:
...
...
theano/scan_module/scan_op.py
浏览文件 @
23e43b1b
...
...
@@ -698,7 +698,7 @@ class Scan(PureOp):
scan_utils
.
hash_listsDictsTuples
(
self
.
info
)))
def
make_thunk
(
self
,
node
,
storage_map
,
compute_map
,
no_recycling
,
python_exec
=
Fals
e
):
impl
=
Non
e
):
"""
Parameters
...
...
@@ -716,8 +716,8 @@ class Scan(PureOp):
no_recycling
List of variables for which it is forbidden to reuse memory
allocated by a previous call.
python_exec
I
f we want python execution.
impl
Use 'py' i
f we want python execution.
Notes
-----
If the thunk consults the storage_map on every call, it is safe
...
...
@@ -866,7 +866,7 @@ class Scan(PureOp):
for
out
in
self
.
fn
.
maker
.
fgraph
.
outputs
]
try
:
if
python_exec
is
True
:
if
impl
==
'py'
:
raise
theano
.
gof
.
cmodule
.
MissingGXX
cython_mintaps
=
numpy
.
asarray
(
self
.
mintaps
,
dtype
=
'int32'
)
cython_tap_array_len
=
\
...
...
@@ -965,13 +965,6 @@ class Scan(PureOp):
rval
.
lazy
=
False
return
rval
def
make_py_thunk
(
self
,
node
,
storage_map
,
compute_map
,
no_recycling
):
return
self
.
make_thunk
(
node
=
node
,
storage_map
=
storage_map
,
compute_map
=
compute_map
,
no_recycling
=
no_recycling
,
python_exec
=
True
)
def
inner_seqs
(
self
,
list_inputs
):
# Given the list of inner inputs this function grabs those
# corresponding to sequences
...
...
theano/tensor/blas.py
浏览文件 @
23e43b1b
...
...
@@ -297,9 +297,6 @@ class Ger(Op):
This interface to GER allows non-destructive operation on A via the
`destructive` argument to the constructor.
:TODO: Create better classes ScipyGer and CGer that inherit from this class
and override the make_thunk() method to use Scipy and C respectively.
"""
__props__
=
(
"destructive"
,)
...
...
theano/tensor/blas_scipy.py
浏览文件 @
23e43b1b
...
...
@@ -22,7 +22,7 @@ if have_fblas:
class
ScipyGer
(
Ger
):
def
prepare_node
(
self
,
node
,
storage_map
,
compute_map
):
def
prepare_node
(
self
,
node
,
storage_map
,
compute_map
,
impl
):
if
impl
==
'py'
:
node
.
tag
.
local_ger
=
_blas_ger_fns
[
numpy
.
dtype
(
node
.
inputs
[
0
]
.
type
.
dtype
)]
...
...
theano/tensor/elemwise.py
浏览文件 @
23e43b1b
...
...
@@ -787,14 +787,15 @@ second dimension
return
ret
def
prepare_node
(
self
,
node
,
storage_map
,
compute_map
):
def
prepare_node
(
self
,
node
,
storage_map
,
compute_map
,
impl
):
# Postpone the ufunc building to the last minutes
# NumPy ufunc support only up to 31 inputs.
# But our c code support more.
if
(
len
(
node
.
inputs
)
<
32
and
(
self
.
nfunc
is
None
or
self
.
scalar_op
.
nin
!=
len
(
node
.
inputs
))
and
self
.
ufunc
is
None
):
self
.
ufunc
is
None
and
impl
==
'py'
):
ufunc
=
numpy
.
frompyfunc
(
self
.
scalar_op
.
impl
,
len
(
node
.
inputs
),
...
...
@@ -830,7 +831,7 @@ second dimension
[
get_scalar_type
(
dtype
=
output
.
type
.
dtype
)
.
make_variable
()
for
output
in
node
.
outputs
])
self
.
scalar_op
.
prepare_node
(
node
.
tag
.
fake_node
,
None
,
None
)
self
.
scalar_op
.
prepare_node
(
node
.
tag
.
fake_node
,
None
,
None
,
impl
)
def
perform
(
self
,
node
,
inputs
,
output_storage
):
if
len
(
node
.
inputs
)
>=
32
:
...
...
@@ -891,13 +892,6 @@ second dimension
if
self
.
ufunc
:
ufunc
=
self
.
ufunc
else
:
if
not
hasattr
(
node
.
tag
,
'ufunc'
):
# It happen that make_thunk isn't called, like in
# get_scalar_constant_value
node
.
tag
.
ufunc
=
numpy
.
frompyfunc
(
self
.
scalar_op
.
impl
,
len
(
node
.
inputs
),
self
.
scalar_op
.
nout
)
ufunc
=
node
.
tag
.
ufunc
nout
=
ufunc
.
nout
...
...
@@ -977,7 +971,7 @@ second dimension
# To not request all of them to call prepare_node(), do it here.
# There is no harm if it get called multile time.
if
not
hasattr
(
node
.
tag
,
'fake_node'
):
self
.
prepare_node
(
node
,
None
,
None
)
self
.
prepare_node
(
node
,
None
,
None
,
'c'
)
_inames
=
inames
_onames
=
onames
...
...
theano/tensor/opt.py
浏览文件 @
23e43b1b
...
...
@@ -6295,15 +6295,12 @@ def constant_folding(node):
for
o
in
node
.
outputs
:
storage_map
[
o
]
=
[
None
]
compute_map
[
o
]
=
[
False
]
impl
=
None
if
(
hasattr
(
node
.
op
,
'python_constant_folding'
)
and
node
.
op
.
python_constant_folding
(
node
)):
thunk
=
node
.
op
.
make_py_thunk
(
node
,
storage_map
,
compute_map
,
[])
else
:
impl
=
'py'
thunk
=
node
.
op
.
make_thunk
(
node
,
storage_map
,
compute_map
,
no_recycling
=
[]
)
no_recycling
=
[],
impl
=
impl
)
required
=
thunk
()
assert
not
required
# a node whose inputs are all provided should always
...
...
theano/tensor/signal/pool.py
浏览文件 @
23e43b1b
...
...
@@ -241,7 +241,7 @@ class Pool(OpenMPOp):
" 'average_inc_pad' and 'average_exc_pad'. Got
%
s"
%
mode
)
self
.
mode
=
mode
def
prepare_node
(
self
,
node
,
storage_map
,
compute_map
):
def
prepare_node
(
self
,
node
,
storage_map
,
compute_map
,
impl
):
if
len
(
node
.
inputs
)
==
1
:
# Old interface
self
.
mode
=
node
.
op
.
mode
...
...
@@ -686,7 +686,7 @@ class PoolGrad(OpenMPOp):
self
.
mode
=
mode
super
(
PoolGrad
,
self
)
.
__init__
(
openmp
=
openmp
)
def
prepare_node
(
self
,
node
,
storage_map
,
compute_map
):
def
prepare_node
(
self
,
node
,
storage_map
,
compute_map
,
impl
):
if
len
(
node
.
inputs
)
<
5
:
# 5 for AveragePoolGrad, 6 for MaxPoolGrad
# Old interface
self
.
mode
=
node
.
op
.
mode
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论