提交 65f54e68 authored 作者: Frédéric Bastien's avatar Frédéric Bastien

Merge pull request #2479 from carriepl/prevent_output_from_inplace

Feature for no outputs from inplace.
......@@ -159,6 +159,18 @@ class AddDestroyHandler(gof.Optimizer):
fgraph.attach_feature(gof.DestroyHandler())
class AddNoOutputFromInplace(gof.Optimizer):
"""This optimizer adds to the fgraph a feature that will prevent outputs
of a fgraph to be created by performing inplace operations on intermediary
variables. This is useful when the outputs of the fgraph are preallocated
to prevent useless copying of the data. Currently, scan preallocates its
outputs
"""
def add_requirements(self, fgraph):
super(AddNoOutputFromInplace, self).add_requirements(fgraph)
fgraph.attach_feature(gof.NoOutputFromInplace())
class PrintCurrentFunctionGraph(gof.Optimizer):
"""This optimizer is for debugging.
......@@ -211,6 +223,9 @@ optdb.register('specialize_device', gof.EquilibriumDB(),
optdb.register('merge2', gof.MergeOptimizer(),
49, 'fast_run', 'merge')
optdb.register('add_no_output_from_inplace', AddNoOutputFromInplace(),
49.4)
optdb.register('add_destroy_handler', AddDestroyHandler(),
49.5, 'fast_run', 'inplace')
......
import theano
from theano.compile.mode import Mode
import theano.tensor as T
def test_no_output_from_implace():
x = T.matrix()
y = T.matrix()
a = T.dot(x, y)
b = T.tanh(a)
# Ensure that the elemwise op that produces the output is inplace when
# using a mode that does not include the optimization
fct_no_opt = theano.function([x,y], b, mode="FAST_RUN")
op = fct_no_opt.maker.fgraph.outputs[0].owner.op
assert (hasattr(op, 'destroy_map') and 0 in op.destroy_map)
# Ensure that the elemwise op that produces the output is not inplace when
# using a mode that includes the optimization
mode_opt = Mode(linker="cvm", optimizer="fast_run")
mode_opt = mode_opt.including("add_no_output_from_inplace")
fct_opt = theano.function([x,y], b, mode=mode_opt)
op = fct_opt.maker.fgraph.outputs[0].owner.op
assert (not hasattr(op, 'destroy_map') or 0 not in op.destroy_map)
......@@ -74,7 +74,7 @@ from theano.gof.optdb import \
from theano.gof.toolbox import \
Feature, \
Bookkeeper, History, Validator, ReplaceValidate, NodeFinder,\
PrintListener, ReplacementDidntRemovedError
PrintListener, ReplacementDidntRemovedError, NoOutputFromInplace
from theano.gof.type import \
Type, Generic, generic
......
import sys
import time
import theano
from theano import config
from theano.gof.python25 import partial
from theano.gof.python25 import OrderedDict
......@@ -394,3 +395,26 @@ class PreserveNames(Feature):
new_r.name = r.name
class NoOutputFromInplace(Feature):
def validate(self, fgraph):
if not hasattr(fgraph, 'destroyers'):
return True
for out in list(fgraph.outputs):
if out.owner is None:
continue
# Validate that the node that produces the output does not produce
# it by modifying something else inplace.
node = out.owner
op = node.op
out_idx = node.outputs.index(out)
if hasattr(op, 'destroy_map') and out_idx in op.destroy_map.keys():
raise theano.gof.InconsistencyError(
"A function graph Feature has requested (probably for ",
"efficiency reasons for scan) that outputs of the graph",
"be prevented from being the result of inplace ",
"operations. This has prevented output ", out, " from ",
"being computed by modifying another variable ",
"inplace.")
......@@ -90,7 +90,7 @@ def test_consistency_randomstreams():
for use_cuda in test_use_cuda:
#print 'use_cuda =', use_cuda
samples = []
rng = MRG_RandomStreams(seed=seed, use_cuda=False)
rng = MRG_RandomStreams(seed=seed, use_cuda=use_cuda)
for i in range(n_streams):
stream_samples = []
u = rng.uniform(size=(n_substreams,), nstreams=n_substreams)
......
diff --git a/theano/scan_module/scan_perform.c b/theano/scan_module/scan_perform.c
index aaebb43..2d06b29 100644
--- a/theano/scan_module/scan_perform.c
+++ b/theano/scan_module/scan_perform.c
@@ -5595,7 +5595,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P
@@ -5597,7 +5597,7 @@
* cdef list stack
* cdef int offset
*/
......@@ -11,29 +7,29 @@ index aaebb43..2d06b29 100644
__Pyx_INCREF(__pyx_t_4);
__pyx_v_descr = ((PyArray_Descr *)__pyx_t_4);
__pyx_t_4 = 0;
@@ -7147,7 +7147,7 @@ static CYTHON_INLINE void __pyx_f_5numpy_set_array_base(PyArrayObject *__pyx_v_a
@@ -7126,7 +7126,7 @@
* arr.base = baseptr
*
*/
- Py_XDECREF(__pyx_v_arr->base);
+ Py_XDECREF(PyArray_BASE(__pyx_v_arr));
/* "/home/anakha/.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":974
/* "numpy.pxd":973
* baseptr = <PyObject*>base
@@ -7156,7 +7156,11 @@ static CYTHON_INLINE void __pyx_f_5numpy_set_array_base(PyArrayObject *__pyx_v_a
@@ -7135,7 +7135,11 @@
*
* cdef inline object get_array_base(ndarray arr):
*/
- __pyx_v_arr->base = __pyx_v_baseptr;
+#if NPY_API_VERSION < 0x00000007
+ PyArray_BASE(__pyx_v_arr) = __pyx_v_baseptr;
+#else
+ PyArray_SetBaseObject(__pyx_v_arr, __pyx_v_baseptr);
+#endif
+ #if NPY_API_VERSION < 0x00000007
+ PyArray_BASE(__pyx_v_arr) = __pyx_v_baseptr;
+ #else
+ PyArray_SetBaseObject(__pyx_v_arr, __pyx_v_baseptr);
+ #endif
/* "/home/anakha/.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":966
*
@@ -7191,7 +7195,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_get_array_base(PyArrayObject *__py
__Pyx_RefNannyFinishContext();
}
@@ -7161,7 +7165,7 @@
* return None
* else:
*/
......@@ -41,8 +37,8 @@ index aaebb43..2d06b29 100644
+ __pyx_t_1 = ((PyArray_BASE(__pyx_v_arr) == NULL) != 0);
if (__pyx_t_1) {
/* "/home/anakha/.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":978
@@ -7214,8 +7218,8 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_get_array_base(PyArrayObject *__py
/* "numpy.pxd":977
@@ -7185,8 +7189,8 @@
* return <object>arr.base # <<<<<<<<<<<<<<
*/
__Pyx_XDECREF(__pyx_r);
......@@ -52,4 +48,4 @@ index aaebb43..2d06b29 100644
+ __pyx_r = ((PyObject *)PyArray_BASE(__pyx_v_arr));
goto __pyx_L0;
}
__pyx_L3:;
......@@ -535,7 +535,7 @@ class Scan(PureOp):
self.n_sit_sot +
self.n_nit_sot)
wrapped_inputs = [Param(x, borrow=True) for x in self.inputs]
wrapped_outputs = [Out(x, borrow=False) for x in
wrapped_outputs = [Out(x, borrow=(x not in self.inputs)) for x in
self.outputs[:slices]]
wrapped_outputs += self.outputs[slices:]
profile = None
......@@ -927,11 +927,14 @@ class Scan(PureOp):
offset += 1
# 4. collecting slices where the output should be stored
# 4.1. Collect slices for mitmots
for idx in xrange(self.n_mit_mot_outs):
output_storage[idx].storage[0] = None
# 4.2. Collect slices for mitsots, sitsots and nitsots
offset = self.n_mit_mot_outs
if i != 0 and self.n_nit_sot > 0:
if i != 0:
for idx in xrange(self.n_outs + self.n_nit_sot -
self.n_mit_mot):
if (store_steps[idx + self.n_mit_mot] == 1 or
......@@ -946,15 +949,24 @@ class Scan(PureOp):
self.n_mit_mot):
output_storage[idx + offset].storage[0] = None
# 4.3. Collect slices for shared outputs
offset += self.n_outs + self.n_nit_sot - self.n_mit_mot
for idx in xrange(self.n_shared_outs):
output_storage[idx + offset].storage[0] = None
# If condition add it to the mix
# 4.4. If there is a condition add it to the mix
if self.as_while:
pdx = offset + self.n_shared_outs
output_storage[pdx].storage[0] = None
# 4.5. Keep a reference to the variables currently in the
# output_storage to be able to compare them with the actual
# outputs of the inner function after its execution
old_output_storage = [o.storage[0] for o in output_storage]
# 5. compute outputs
t0_fn = time.time()
try:
fn()
except Exception:
......@@ -974,11 +986,18 @@ class Scan(PureOp):
else:
# old-style linkers raise their own exceptions
raise
dt_fn = time.time() - t0_fn
if self.as_while:
pdx = offset + self.n_shared_outs
cond = output_storage[pdx].storage[0] == 0
# Check which of the pre-allocated outputs (if applicable) have
# been reused by the inner function
output_reused = [old_output_storage[o] is
output_storage[o].storage[0]
for o in range(len(output_storage))]
t_fn += dt_fn
offset_out = 0
# 5.1 Copy over the values for mit_mot outputs
......@@ -995,8 +1014,7 @@ class Scan(PureOp):
for j in xrange(begin, end):
if (store_steps[j] == 1 or self.vector_outs[j] or
outs[j][0][pos[j]] is not
output_storage[offset_out + j].storage[0]):
not output_reused[offset_out + j]):
outs[j][0][pos[j]] = \
output_storage[offset_out + j].storage[0]
......@@ -1020,8 +1038,7 @@ class Scan(PureOp):
outs[j][0] = outs[j][0][:store_steps[j]]
outs[j][0][pos[j]] = output_storage[jout].storage[0]
elif (store_steps[j] == 1 or self.vector_outs[j] or
outs[j][0][pos[j]] is not
output_storage[j + offset_out].storage[0]):
not output_reused[offset_out + j]):
outs[j][0][pos[j]] = \
output_storage[j + offset_out].storage[0]
......
This source diff could not be displayed because it is too large. You can view the blob instead.
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论