提交 cd2b4a31 authored 作者: James Bergstra's avatar James Bergstra

adding lazy cond op as IfElse Op

* * * new op: Cond * * * gpu lazycond optimization
上级 9433d5d2
......@@ -11,8 +11,6 @@ How should you write your algorithm to make the most of what Theano can do?
Limitations
-----------
- Conditional control flow is possible but currently not efficient. The current implementation will evaluate both sides of an ``if`` construct (see :func:`tensor.switch`).
- While- or for-Loops within an expression graph are supported, but only via
the :func:`theano.scan` op (which puts restrictions on how the loop body can
interact with the rest of the graph).
......
"""Provides `DebugMode`, an evaluation mode for debugging theano internals.
:TODO: add support for Cond Op, LazyLinker, PureOp, etc.
:TODO: add support for IfElse Op, LazyLinker, PureOp, etc.
"""
__docformat__ = "restructuredtext en"
......
from copy import deepcopy
import numpy
from theano.gof.op import PureOp
from theano.gof import Apply, generic, Container
from theano.gof.link import LocalLinker, map_storage, add_clear_storage
from theano import function, Mode
from theano.lazycond import ifelse
import theano.tensor as T
class IfElseIfElseIf(PureOp):
def __init__(self, inplace=False):
self.inplace=inplace # check destroyhandler and others to ensure that a view_map with
#multiple inputs can work
assert not self.inplace
def make_node(self, c1, t1, c2,t2,c3,t3,f3):
assert t1.type == f3.type
assert t2.type == t3.type
assert t3.type == f3.type
return Apply(self, [c1,t1,c2,t2,c3,t3,f3], [t1.type()])
def make_thunk(self, node, storage_map, compute_map, no_recycling):
input_computed = [compute_map[v] for v in node.inputs]
output_computed = [compute_map[v] for v in node.outputs]
input_registers = [storage_map[v] for v in node.inputs]
output_registers = [storage_map[v] for v in node.outputs]
outtype = node.outputs[0].type
def thunk():
if not input_computed[0][0]:
return [0]
else:
truthval = input_registers[0][0]
if truthval:
if not input_computed[1][0]:
return [1]
else:
output_computed[0][0]=1
output_registers[0][0]=outtype.filter(deepcopy(input_registers[1][0]))
return []
else:
if not input_computed[2][0]:
return [2]
else:
truthval = input_registers[2][0]
if truthval:
if not input_computed[3][0]:
return [3]
else:
output_computed[0][0] = 1
output_registers[0][0] = outtype.filter(deepcopy(input_registers[3][0]))
return []
else:
if not input_computed[4][0]:
return [4]
else:
truthval = input_registers[4][0]
if truthval:
if not input_computed[5][0]:
return [5]
else:
output_computed[0][0] = 1
output_registers[0][0] = outtype.filter(deepcopy(input_registers[5][0]))
return []
else:
if not input_computed[6][0]:
return [6]
else:
output_computed[0][0] = 1
output_registers[0][0] = outtype.filter(deepcopy(input_registers[6][0]))
return []
thunk.lazy = True
return thunk
class NotImplementedOp(PureOp):
class E(Exception): pass
def make_node(self, x):
return Apply(self, [x], [x.type()])
def make_thunk(self, node, storage_map, compute_map, no_recycling):
def thunk():
raise self.E()
thunk.lazy=False
return thunk
def test_ifelse():
a = generic()
b = generic()
c = generic()
notimpl = NotImplementedOp()
f = function([a,b,c], ifelse(a, notimpl(b), c),
mode=Mode(linker='vm', optimizer='fast_run'))
try:
print "case 1"
f( True, 'a', 'b')
assert False
except NotImplementedOp.E:
pass
print "... passed"
print "case 2"
print f( False, 'a', 'b')
assert f( False, 'a', 'b') == 'b'
print "... passed"
def more_complex_test():
notimpl = NotImplementedOp()
ifelseifelseif = IfElseIfElseIf()
x1 = T.scalar('x1')
x2 = T.scalar('x2')
c1 = generic('c1')
c2 = generic('c2')
t1 = ifelse(c1,x1,notimpl(x2))
t1.name = 't1'
t2 = t1*10
t2.name = 't2'
t3 = ifelse(c2,t2, x1+t1)
t3.name = 't3'
t4 = ifelseifelseif(T.eq(x1,x2), x1, T.eq(x1,5), x2, c2, t3, t3+0.5)
t4.name = 't4'
f = function([c1,c2,x1,x2], t4, mode=Mode(linker='vm', optimizer='fast_run'))
print f(1, 0, numpy.array(10,dtype=x1.dtype),0)
assert f(1,0,numpy.array(10,dtype=x1.dtype),0) == 20.5
print '... passed'
if __name__ == '__main__':
more_complex_test()
import gc
import sys
import time
import line_profiler
try:
import line_profiler
except ImportError:
pass
import numpy
from theano import function
......@@ -9,7 +12,7 @@ from theano.gof import vm,link, OpWiseCLinker
from theano.compile import Mode
from theano import tensor
from theano.lazycond import cond
from theano.lazycond import ifelse
import theano
def test_speed():
......@@ -91,7 +94,7 @@ def test_speed_lazy():
def build_graph(x, depth=5):
z = x
for d in range(depth):
z = cond(z> 0, -z, z)
z = ifelse(z> 0, -z, z)
return z
def time_linker(name, linker):
......@@ -105,12 +108,12 @@ def test_speed_lazy():
f_a = function([x], a,
mode=Mode(optimizer=None,
linker=linker()),
#profile='f_a lazy cond %s'%name,
#profile='f_a lazy ifelse %s'%name,
)
f_b = function([x], b,
mode=Mode(optimizer=None,
linker=linker()),
#profile='f_b lazy cond %s'%name,
#profile='f_b lazy ifelse %s'%name,
)
print f_a([2.0])
......@@ -186,7 +189,7 @@ if run_memory_usage_tests:
def build_graph(x, depth=5):
z = x
for d in range(depth):
z = cond(z> 0, -z, z)
z = ifelse(z> 0, -z, z)
return z
def time_linker(name, linker):
......
"""
IfElse is an Op that works with the LazyLinker to support conditional graph evaluation.
:TODO: Add text to library documentation describing the IfElse Op.
"""
from copy import deepcopy
from theano.gof import PureOp, Apply, generic, Container
import theano.tensor
import gof
from compile import optdb
from tensor import opt
@gof.local_optimizer([None])
def ifelse_make_inplace(node):
op = node.op
if isinstance(op, IfElse) and not op.as_view :
print 'ifelse_make_inplace applied'
return IfElse(as_view = True,
gpu = op.gpu, name=op.name).make_node(*node.inputs).outputs
return False
optdb.register('ifelse_make_inplace', opt.in2out(ifelse_make_inplace,
ignore_newtrees=True), 95, 'fast_run', 'inplace')
class IfElse(PureOp):
"""
Op that works with LazyLinker to support conditional graph evaluation.
Example usage:
``rval = ifelse(tf, rval_if_true, rval_if_false)``
:note:
Other Linkers (ALL other linkers right now) are INCOMPATIBLE with this
Op, they will produce functions that FAIL TO EXECUTE.
"""
def __init__(self, as_view=False, gpu = False, name = None):
if as_view:
# check destroyhandler and others to ensure that a view_map with
# multiple inputs can work
view_map = {}
view_map[0] = [1]
self.view_map = view_map
#raise NotImplementedError('IfElse must copy for now')
self.as_view=as_view
self.gpu = gpu
self.name = name
def make_node(self, c, t, f):
if t.type != f.type:
raise TypeError(
'IfElse requires same types for true and false args',
(t.type, f.type))
return Apply(self, [c,t,f], [t.type()])
def make_thunk(self, node, storage_map, compute_map, no_recycling):
outtype = node.outputs[0].type
c,t,f = node.inputs
output = node.outputs[0]
def thunk():
if not compute_map[c][0]:
return [0]
else:
truthval = storage_map[c][0]
if truthval:
if not compute_map[t][0]:
return [1]
else:
compute_map[output][0]=1
if self.as_view:
oval = outtype.filter(storage_map[t][0])
else:
oval = outtype.filter(
deepcopy(storage_map[t][0]))
storage_map[output][0] = oval
return []
else:
if not compute_map[f][0]:
return [2]
else:
# can't view both outputs unless destroyhandler
# improves
compute_map[output][0]=1
oval = outtype.filter(
deepcopy(storage_map[f][0]))
storage_map[output][0]=oval
return []
thunk.lazy = True
thunk.inputs = [storage_map[v] for v in node.inputs]
thunk.outputs = [storage_map[v] for v in node.outputs]
return thunk
ifelse = IfElse()
......@@ -391,7 +391,7 @@ default_colorCodes = {'GpuFromHost' : 'red',
'HostFromGpu' : 'red',
'Scan' : 'yellow',
'Shape' : 'cyan',
'Cond' : 'magenta',
'IfElse' : 'magenta',
'Elemwise': '#FFAABB',
'Subtensor': '#FFAAFF'}
......@@ -473,10 +473,10 @@ def pydotprint(fct, outfile=None,
c3 = pd.Cluster('Middle')
cond = None
for node in fct_env.toposort():
if node.op.__class__.__name__=='Cond' and node.op.name == cond_highlight:
if node.op.__class__.__name__=='IfElse' and node.op.name == cond_highlight:
cond = node
if cond is None:
_warn("pydotprint: cond_highlight is set but there is no Cond node in the graph")
_warn("pydotprint: cond_highlight is set but there is no IfElse node in the graph")
cond_highlight = None
if cond_highlight is not None:
......
......@@ -270,6 +270,48 @@ def local_gpu_dot_to_dot22(node):
shape_out))]
return False
@register_opt()
@local_optimizer([])
def local_gpu_lazy_ifelse(node):
"""
gpu_from_host(dot22) -> gpudot(gpu_from_host)
dot(host_from_gpu) -> host_from_gpu(gpudot22)
"""
import theano
if hasattr(theano,"lazycond"):
gpu_ifelse = theano.lazycond.IfElse(gpu = True)
if node.op == gpu_from_host:
host_input = node.inputs[0]
if (host_input.owner
and host_input.owner.op == theano.lazycond.ifelse):
c, t, f = host_input.owner.inputs
if not isinstance(f.type,CudaNdarrayType):
f = gpu_from_host(f)
if not isinstance(t.type,CudaNdarrayType):
t = gpu_from_host(t)
if isinstance(c.type,CudaNdarrayType):
c = host_from_gpu(c)
return [gpu_ifelse(c, t, f)]
if node.op == theano.lazycond.ifelse:
if numpy.any([(i.owner and i.owner.op == host_from_gpu) for i in node.inputs]):
c, t, f = node.inputs
if not isinstance(f.type,CudaNdarrayType):
f = gpu_from_host(f)
if not isinstance(t.type,CudaNdarrayType):
t = gpu_from_host(t)
if isinstance(c.type,CudaNdarrayType):
c = host_from_gpu(c)
return [host_from_gpu(gpu_ifelse(c, t, f))]
return False
@register_opt()
@local_optimizer([])
......
......@@ -567,7 +567,7 @@ class ScanMerge(gof.Optimizer):
def apply(self, env):
nodelist = list(env.toposort())
cond_nodes = [ x for x in nodelist if x.op.__class__.__name__=='Cond']
cond_nodes = [ x for x in nodelist if x.op.__class__.__name__=='IfElse']
scan_nodes = [ x for x in nodelist if x.op.__class__.__name__=='Scan']
# Having lazy ifs in the graph complicates a bit things, and for
......
......@@ -57,7 +57,7 @@ def test_gc():
#because temporaries that weren't collected shouldn't be pickled anyway
len_post_f = len(post_f)
len_post_g = len(post_g)
assert len_post_f == len_post_g
assert len_post_f == len_post_g, (f_linker, len_post_f, len_post_g)
def test_merge_opt_runtime():
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论