提交 65f54e68 authored 作者: Frédéric Bastien's avatar Frédéric Bastien

Merge pull request #2479 from carriepl/prevent_output_from_inplace

Feature for no outputs from inplace.
...@@ -159,6 +159,18 @@ class AddDestroyHandler(gof.Optimizer): ...@@ -159,6 +159,18 @@ class AddDestroyHandler(gof.Optimizer):
fgraph.attach_feature(gof.DestroyHandler()) fgraph.attach_feature(gof.DestroyHandler())
class AddNoOutputFromInplace(gof.Optimizer):
"""This optimizer adds to the fgraph a feature that will prevent outputs
of a fgraph to be created by performing inplace operations on intermediary
variables. This is useful when the outputs of the fgraph are preallocated
to prevent useless copying of the data. Currently, scan preallocates its
outputs
"""
def add_requirements(self, fgraph):
super(AddNoOutputFromInplace, self).add_requirements(fgraph)
fgraph.attach_feature(gof.NoOutputFromInplace())
class PrintCurrentFunctionGraph(gof.Optimizer): class PrintCurrentFunctionGraph(gof.Optimizer):
"""This optimizer is for debugging. """This optimizer is for debugging.
...@@ -211,6 +223,9 @@ optdb.register('specialize_device', gof.EquilibriumDB(), ...@@ -211,6 +223,9 @@ optdb.register('specialize_device', gof.EquilibriumDB(),
optdb.register('merge2', gof.MergeOptimizer(), optdb.register('merge2', gof.MergeOptimizer(),
49, 'fast_run', 'merge') 49, 'fast_run', 'merge')
optdb.register('add_no_output_from_inplace', AddNoOutputFromInplace(),
49.4)
optdb.register('add_destroy_handler', AddDestroyHandler(), optdb.register('add_destroy_handler', AddDestroyHandler(),
49.5, 'fast_run', 'inplace') 49.5, 'fast_run', 'inplace')
......
import theano
from theano.compile.mode import Mode
import theano.tensor as T
def test_no_output_from_implace():
x = T.matrix()
y = T.matrix()
a = T.dot(x, y)
b = T.tanh(a)
# Ensure that the elemwise op that produces the output is inplace when
# using a mode that does not include the optimization
fct_no_opt = theano.function([x,y], b, mode="FAST_RUN")
op = fct_no_opt.maker.fgraph.outputs[0].owner.op
assert (hasattr(op, 'destroy_map') and 0 in op.destroy_map)
# Ensure that the elemwise op that produces the output is not inplace when
# using a mode that includes the optimization
mode_opt = Mode(linker="cvm", optimizer="fast_run")
mode_opt = mode_opt.including("add_no_output_from_inplace")
fct_opt = theano.function([x,y], b, mode=mode_opt)
op = fct_opt.maker.fgraph.outputs[0].owner.op
assert (not hasattr(op, 'destroy_map') or 0 not in op.destroy_map)
...@@ -74,7 +74,7 @@ from theano.gof.optdb import \ ...@@ -74,7 +74,7 @@ from theano.gof.optdb import \
from theano.gof.toolbox import \ from theano.gof.toolbox import \
Feature, \ Feature, \
Bookkeeper, History, Validator, ReplaceValidate, NodeFinder,\ Bookkeeper, History, Validator, ReplaceValidate, NodeFinder,\
PrintListener, ReplacementDidntRemovedError PrintListener, ReplacementDidntRemovedError, NoOutputFromInplace
from theano.gof.type import \ from theano.gof.type import \
Type, Generic, generic Type, Generic, generic
......
import sys import sys
import time import time
import theano
from theano import config from theano import config
from theano.gof.python25 import partial from theano.gof.python25 import partial
from theano.gof.python25 import OrderedDict from theano.gof.python25 import OrderedDict
...@@ -394,3 +395,26 @@ class PreserveNames(Feature): ...@@ -394,3 +395,26 @@ class PreserveNames(Feature):
new_r.name = r.name new_r.name = r.name
class NoOutputFromInplace(Feature):
def validate(self, fgraph):
if not hasattr(fgraph, 'destroyers'):
return True
for out in list(fgraph.outputs):
if out.owner is None:
continue
# Validate that the node that produces the output does not produce
# it by modifying something else inplace.
node = out.owner
op = node.op
out_idx = node.outputs.index(out)
if hasattr(op, 'destroy_map') and out_idx in op.destroy_map.keys():
raise theano.gof.InconsistencyError(
"A function graph Feature has requested (probably for ",
"efficiency reasons for scan) that outputs of the graph",
"be prevented from being the result of inplace ",
"operations. This has prevented output ", out, " from ",
"being computed by modifying another variable ",
"inplace.")
...@@ -90,7 +90,7 @@ def test_consistency_randomstreams(): ...@@ -90,7 +90,7 @@ def test_consistency_randomstreams():
for use_cuda in test_use_cuda: for use_cuda in test_use_cuda:
#print 'use_cuda =', use_cuda #print 'use_cuda =', use_cuda
samples = [] samples = []
rng = MRG_RandomStreams(seed=seed, use_cuda=False) rng = MRG_RandomStreams(seed=seed, use_cuda=use_cuda)
for i in range(n_streams): for i in range(n_streams):
stream_samples = [] stream_samples = []
u = rng.uniform(size=(n_substreams,), nstreams=n_substreams) u = rng.uniform(size=(n_substreams,), nstreams=n_substreams)
......
diff --git a/theano/scan_module/scan_perform.c b/theano/scan_module/scan_perform.c @@ -5597,7 +5597,7 @@
index aaebb43..2d06b29 100644
--- a/theano/scan_module/scan_perform.c
+++ b/theano/scan_module/scan_perform.c
@@ -5595,7 +5595,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P
* cdef list stack * cdef list stack
* cdef int offset * cdef int offset
*/ */
...@@ -11,29 +7,29 @@ index aaebb43..2d06b29 100644 ...@@ -11,29 +7,29 @@ index aaebb43..2d06b29 100644
__Pyx_INCREF(__pyx_t_4); __Pyx_INCREF(__pyx_t_4);
__pyx_v_descr = ((PyArray_Descr *)__pyx_t_4); __pyx_v_descr = ((PyArray_Descr *)__pyx_t_4);
__pyx_t_4 = 0; __pyx_t_4 = 0;
@@ -7147,7 +7147,7 @@ static CYTHON_INLINE void __pyx_f_5numpy_set_array_base(PyArrayObject *__pyx_v_a @@ -7126,7 +7126,7 @@
* arr.base = baseptr * arr.base = baseptr
* *
*/ */
- Py_XDECREF(__pyx_v_arr->base); - Py_XDECREF(__pyx_v_arr->base);
+ Py_XDECREF(PyArray_BASE(__pyx_v_arr)); + Py_XDECREF(PyArray_BASE(__pyx_v_arr));
/* "/home/anakha/.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":974 /* "numpy.pxd":973
* baseptr = <PyObject*>base * baseptr = <PyObject*>base
@@ -7156,7 +7156,11 @@ static CYTHON_INLINE void __pyx_f_5numpy_set_array_base(PyArrayObject *__pyx_v_a @@ -7135,7 +7135,11 @@
* *
* cdef inline object get_array_base(ndarray arr): * cdef inline object get_array_base(ndarray arr):
*/ */
- __pyx_v_arr->base = __pyx_v_baseptr; - __pyx_v_arr->base = __pyx_v_baseptr;
+#if NPY_API_VERSION < 0x00000007 + #if NPY_API_VERSION < 0x00000007
+ PyArray_BASE(__pyx_v_arr) = __pyx_v_baseptr; + PyArray_BASE(__pyx_v_arr) = __pyx_v_baseptr;
+#else + #else
+ PyArray_SetBaseObject(__pyx_v_arr, __pyx_v_baseptr); + PyArray_SetBaseObject(__pyx_v_arr, __pyx_v_baseptr);
+#endif + #endif
/* "/home/anakha/.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":966 __Pyx_RefNannyFinishContext();
* }
@@ -7191,7 +7195,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_get_array_base(PyArrayObject *__py @@ -7161,7 +7165,7 @@
* return None * return None
* else: * else:
*/ */
...@@ -41,8 +37,8 @@ index aaebb43..2d06b29 100644 ...@@ -41,8 +37,8 @@ index aaebb43..2d06b29 100644
+ __pyx_t_1 = ((PyArray_BASE(__pyx_v_arr) == NULL) != 0); + __pyx_t_1 = ((PyArray_BASE(__pyx_v_arr) == NULL) != 0);
if (__pyx_t_1) { if (__pyx_t_1) {
/* "/home/anakha/.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":978 /* "numpy.pxd":977
@@ -7214,8 +7218,8 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_get_array_base(PyArrayObject *__py @@ -7185,8 +7189,8 @@
* return <object>arr.base # <<<<<<<<<<<<<< * return <object>arr.base # <<<<<<<<<<<<<<
*/ */
__Pyx_XDECREF(__pyx_r); __Pyx_XDECREF(__pyx_r);
...@@ -52,4 +48,4 @@ index aaebb43..2d06b29 100644 ...@@ -52,4 +48,4 @@ index aaebb43..2d06b29 100644
+ __pyx_r = ((PyObject *)PyArray_BASE(__pyx_v_arr)); + __pyx_r = ((PyObject *)PyArray_BASE(__pyx_v_arr));
goto __pyx_L0; goto __pyx_L0;
} }
__pyx_L3:;
...@@ -535,7 +535,7 @@ class Scan(PureOp): ...@@ -535,7 +535,7 @@ class Scan(PureOp):
self.n_sit_sot + self.n_sit_sot +
self.n_nit_sot) self.n_nit_sot)
wrapped_inputs = [Param(x, borrow=True) for x in self.inputs] wrapped_inputs = [Param(x, borrow=True) for x in self.inputs]
wrapped_outputs = [Out(x, borrow=False) for x in wrapped_outputs = [Out(x, borrow=(x not in self.inputs)) for x in
self.outputs[:slices]] self.outputs[:slices]]
wrapped_outputs += self.outputs[slices:] wrapped_outputs += self.outputs[slices:]
profile = None profile = None
...@@ -927,11 +927,14 @@ class Scan(PureOp): ...@@ -927,11 +927,14 @@ class Scan(PureOp):
offset += 1 offset += 1
# 4. collecting slices where the output should be stored # 4. collecting slices where the output should be stored
# 4.1. Collect slices for mitmots
for idx in xrange(self.n_mit_mot_outs): for idx in xrange(self.n_mit_mot_outs):
output_storage[idx].storage[0] = None output_storage[idx].storage[0] = None
# 4.2. Collect slices for mitsots, sitsots and nitsots
offset = self.n_mit_mot_outs offset = self.n_mit_mot_outs
if i != 0 and self.n_nit_sot > 0: if i != 0:
for idx in xrange(self.n_outs + self.n_nit_sot - for idx in xrange(self.n_outs + self.n_nit_sot -
self.n_mit_mot): self.n_mit_mot):
if (store_steps[idx + self.n_mit_mot] == 1 or if (store_steps[idx + self.n_mit_mot] == 1 or
...@@ -946,15 +949,24 @@ class Scan(PureOp): ...@@ -946,15 +949,24 @@ class Scan(PureOp):
self.n_mit_mot): self.n_mit_mot):
output_storage[idx + offset].storage[0] = None output_storage[idx + offset].storage[0] = None
# 4.3. Collect slices for shared outputs
offset += self.n_outs + self.n_nit_sot - self.n_mit_mot offset += self.n_outs + self.n_nit_sot - self.n_mit_mot
for idx in xrange(self.n_shared_outs): for idx in xrange(self.n_shared_outs):
output_storage[idx + offset].storage[0] = None output_storage[idx + offset].storage[0] = None
# If condition add it to the mix
# 4.4. If there is a condition add it to the mix
if self.as_while: if self.as_while:
pdx = offset + self.n_shared_outs pdx = offset + self.n_shared_outs
output_storage[pdx].storage[0] = None output_storage[pdx].storage[0] = None
# 4.5. Keep a reference to the variables currently in the
# output_storage to be able to compare them with the actual
# outputs of the inner function after its execution
old_output_storage = [o.storage[0] for o in output_storage]
# 5. compute outputs # 5. compute outputs
t0_fn = time.time() t0_fn = time.time()
try: try:
fn() fn()
except Exception: except Exception:
...@@ -974,11 +986,18 @@ class Scan(PureOp): ...@@ -974,11 +986,18 @@ class Scan(PureOp):
else: else:
# old-style linkers raise their own exceptions # old-style linkers raise their own exceptions
raise raise
dt_fn = time.time() - t0_fn dt_fn = time.time() - t0_fn
if self.as_while: if self.as_while:
pdx = offset + self.n_shared_outs pdx = offset + self.n_shared_outs
cond = output_storage[pdx].storage[0] == 0 cond = output_storage[pdx].storage[0] == 0
# Check which of the pre-allocated outputs (if applicable) have
# been reused by the inner function
output_reused = [old_output_storage[o] is
output_storage[o].storage[0]
for o in range(len(output_storage))]
t_fn += dt_fn t_fn += dt_fn
offset_out = 0 offset_out = 0
# 5.1 Copy over the values for mit_mot outputs # 5.1 Copy over the values for mit_mot outputs
...@@ -995,8 +1014,7 @@ class Scan(PureOp): ...@@ -995,8 +1014,7 @@ class Scan(PureOp):
for j in xrange(begin, end): for j in xrange(begin, end):
if (store_steps[j] == 1 or self.vector_outs[j] or if (store_steps[j] == 1 or self.vector_outs[j] or
outs[j][0][pos[j]] is not not output_reused[offset_out + j]):
output_storage[offset_out + j].storage[0]):
outs[j][0][pos[j]] = \ outs[j][0][pos[j]] = \
output_storage[offset_out + j].storage[0] output_storage[offset_out + j].storage[0]
...@@ -1020,8 +1038,7 @@ class Scan(PureOp): ...@@ -1020,8 +1038,7 @@ class Scan(PureOp):
outs[j][0] = outs[j][0][:store_steps[j]] outs[j][0] = outs[j][0][:store_steps[j]]
outs[j][0][pos[j]] = output_storage[jout].storage[0] outs[j][0][pos[j]] = output_storage[jout].storage[0]
elif (store_steps[j] == 1 or self.vector_outs[j] or elif (store_steps[j] == 1 or self.vector_outs[j] or
outs[j][0][pos[j]] is not not output_reused[offset_out + j]):
output_storage[j + offset_out].storage[0]):
outs[j][0][pos[j]] = \ outs[j][0][pos[j]] = \
output_storage[j + offset_out].storage[0] output_storage[j + offset_out].storage[0]
......
This source diff could not be displayed because it is too large. You can view the blob instead.
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论