提交 94519309 authored 作者: Olivier Delalleau's avatar Olivier Delalleau

Merged

...@@ -393,7 +393,7 @@ default_colorCodes = {'GpuFromHost' : 'red', ...@@ -393,7 +393,7 @@ default_colorCodes = {'GpuFromHost' : 'red',
def pydotprint(fct, outfile=None, def pydotprint(fct, outfile=None,
compact=True, format='png', with_ids=False, compact=True, format='png', with_ids=False,
high_contrast=False, cond_highlight = None, colorCodes = None, high_contrast=False, cond_highlight = None, colorCodes = None,
max_label_size=50): max_label_size=50, scan_graphs = False):
""" """
print to a file in png format the graph of op of a compile theano fct. print to a file in png format the graph of op of a compile theano fct.
...@@ -401,6 +401,8 @@ def pydotprint(fct, outfile=None, ...@@ -401,6 +401,8 @@ def pydotprint(fct, outfile=None,
:param outfile: the output file where to put the graph. :param outfile: the output file where to put the graph.
:param compact: if True, will remove intermediate var that don't have name. :param compact: if True, will remove intermediate var that don't have name.
:param format: the file format of the output. :param format: the file format of the output.
:param with_ids: Print the toposort index of the node in the node name.
and an index number in the variable ellipse.
:param high_contrast: if true, the color that describes the respective :param high_contrast: if true, the color that describes the respective
node is filled with its corresponding color, instead of coloring node is filled with its corresponding color, instead of coloring
the border the border
...@@ -412,6 +414,11 @@ def pydotprint(fct, outfile=None, ...@@ -412,6 +414,11 @@ def pydotprint(fct, outfile=None,
right branch, ops that are on both branches right branch, ops that are on both branches
As an alternative you can provide the node that represents As an alternative you can provide the node that represents
the lazy if the lazy if
:param scan_graphs: if true it will plot the inner graph of each scan op
in files with the same name as the name given for the main
file to which the name of the scan op is concatenated and
the index in the toposort of the scan.
This index can be printed in the graph with the option with_ids.
In the graph, box are an Apply Node(the execution of an op) and ellipse are variable. In the graph, box are an Apply Node(the execution of an op) and ellipse are variable.
If variable have name they are used as the text(if multiple var have the same name, they will be merged in the graph). If variable have name they are used as the text(if multiple var have the same name, they will be merged in the graph).
...@@ -428,7 +435,6 @@ def pydotprint(fct, outfile=None, ...@@ -428,7 +435,6 @@ def pydotprint(fct, outfile=None,
if colorCodes is None: if colorCodes is None:
colorCodes = default_colorCodes colorCodes = default_colorCodes
if outfile is None: if outfile is None:
outfile = os.path.join(config.compiledir,'theano.pydotprint.' + outfile = os.path.join(config.compiledir,'theano.pydotprint.' +
config.device + '.' + format) config.device + '.' + format)
...@@ -499,8 +505,10 @@ def pydotprint(fct, outfile=None, ...@@ -499,8 +505,10 @@ def pydotprint(fct, outfile=None,
#a var id is needed as otherwise var with the same type will be merged in the graph. #a var id is needed as otherwise var with the same type will be merged in the graph.
varstr = str(var.type) varstr = str(var.type)
if (varstr in all_strings) or with_ids: if (varstr in all_strings) or with_ids:
varstr += ' id=' + str(len(var_str)) idx = ' id=' + str(len(var_str))
if len(varstr) > max_label_size: if len(varstr)+len(idx) > max_label_size:
varstr = varstr[:max_label_size-3-len(idx)]+idx+'...'
elif len(varstr) > max_label_size:
varstr = varstr[:max_label_size-3]+'...' varstr = varstr[:max_label_size-3]+'...'
var_str[var]=varstr var_str[var]=varstr
all_strings.add(varstr) all_strings.add(varstr)
...@@ -523,11 +531,14 @@ def pydotprint(fct, outfile=None, ...@@ -523,11 +531,14 @@ def pydotprint(fct, outfile=None,
else: pf = time*100/mode.fct_call_time[fct] else: pf = time*100/mode.fct_call_time[fct]
prof_str=' (%.3fs,%.3f%%,%.3f%%)'%(time,pt,pf) prof_str=' (%.3fs,%.3f%%,%.3f%%)'%(time,pt,pf)
applystr = str(node.op).replace(':','_') applystr = str(node.op).replace(':','_')
if len(applystr)>max_label_size:
applystr = applystr[:max_label_size-3]+'...'
if (applystr in all_strings) or with_ids:
applystr = applystr+' id='+str(topo.index(node))
applystr += prof_str applystr += prof_str
if (applystr in all_strings) or with_ids:
idx = ' id='+str(topo.index(node))
if len(applystr)+len(idx) > max_label_size:
applystr = applystr[:max_label_size-3-len(idx)]+idx+'...'
elif len(applystr) > max_label_size:
applystr = applystr[:max_label_size-3]+'...'
all_strings.add(applystr) all_strings.add(applystr)
apply_name_cache[node] = applystr apply_name_cache[node] = applystr
return applystr return applystr
...@@ -626,6 +637,27 @@ def pydotprint(fct, outfile=None, ...@@ -626,6 +637,27 @@ def pydotprint(fct, outfile=None,
g.write(outfile, prog='dot', format=format) g.write(outfile, prog='dot', format=format)
print 'The output file is available at',outfile print 'The output file is available at',outfile
if scan_graphs:
scan_ops = [(idx, x) for idx,x in enumerate(fct_env.toposort()) if isinstance(x.op, theano.scan_module.scan_op.Scan)]
path, fn = os.path.split(outfile)
basename = '.'.join(fn.split('.')[:-1])
# Safe way of doing things .. a file name may contain multiple .
ext = fn[len(basename):]
for idx, scan_op in scan_ops:
# is there a chance that name is not defined?
if hasattr(scan_op.op,'name'):
new_name = basename+'_'+scan_op.op.name+'_'+str(idx)
else:
new_name = basename+'_'+str(idx)
new_name = os.path.join(path, new_name+ext)
pydotprint(scan_op.op.fn, new_name, compact, format, with_ids,
high_contrast, cond_highlight, colorCodes,
max_label_size, scan_graphs)
......
...@@ -367,8 +367,21 @@ class ScanSaveMem(gof.Optimizer): ...@@ -367,8 +367,21 @@ class ScanSaveMem(gof.Optimizer):
# If the memory for this output has been pre-allocated # If the memory for this output has been pre-allocated
# before going into the scan op (by an alloc node) # before going into the scan op (by an alloc node)
if idx < op.n_mit_sot + op.n_sit_sot: if idx < op.n_mit_sot + op.n_sit_sot:
_nw_input = nw_inputs[offset+idx].owner.inputs[1] # In case the input is still an alloc node
nw_input = scan_utils.expand( _nw_input, val - init_l[i] ) if nw_inputs[offset+idx].owner:
_nw_input = nw_inputs[offset+idx].owner.inputs[1]
nw_input = scan_utils.expand( _nw_input, val - init_l[i] )
# Else, if it was constant folded to a single value
elif isinstance(nw_inputs[offset+idx], tensor.Constant):
# The hope is that constant folding will fold
# this as well
nw_input = nw_inputs[offset+idx][:val]
else:
raise Exception(('Unforseen case. Please report'
' to theano-dev with an example'
' script for this case to be'
' debuged'))
nw_inputs[offset+idx] = nw_input nw_inputs[offset+idx] = nw_input
replaced_outs.append(op.n_mit_mot + idx) replaced_outs.append(op.n_mit_mot + idx)
odx = op.n_mit_mot + idx odx = op.n_mit_mot + idx
......
...@@ -6,6 +6,8 @@ import theano ...@@ -6,6 +6,8 @@ import theano
import theano.sandbox.rng_mrg import theano.sandbox.rng_mrg
from theano import tensor from theano import tensor
from theano.tests import unittest_tools as utt from theano.tests import unittest_tools as utt
from theano.compile.pfunc import rebuild_collect_shared
''' '''
Questions and notes about scan that should be answered : Questions and notes about scan that should be answered :
...@@ -162,7 +164,29 @@ def scan_project_sum(*args, **kwargs): ...@@ -162,7 +164,29 @@ def scan_project_sum(*args, **kwargs):
def asarrayX(value): def asarrayX(value):
return theano._asarray(value, dtype=theano.config.floatX) return theano._asarray(value, dtype=theano.config.floatX)
def clone_optimized_graph(f):
maker_ins = [x for x in f.maker.env.inputs if not isinstance(x,
theano.tensor.sharedvar.SharedVariable)]
inps, outs, _ = rebuild_collect_shared(f.maker.env.outputs,
maker_ins,
copy_inputs_over = False)
ins = [x for x in inps if not isinstance(x, theano.tensor.sharedvar.SharedVariable)]
return (ins, outs)
def grab_scan_node(output):
if output.owner is None:
return None
if output.owner.op.__class__.__name__=='Scan':
return [output.owner]
rval =[]
for i in output.owner.inputs:
ri = grab_scan_node(i)
if ri is not None:
rval += ri
if rval is []:
return None
else:
return rval
class T_Scan(unittest.TestCase): class T_Scan(unittest.TestCase):
#class T_Scan(object): #class T_Scan(object):
...@@ -2044,16 +2068,16 @@ class T_Scan(unittest.TestCase): ...@@ -2044,16 +2068,16 @@ class T_Scan(unittest.TestCase):
o,_ = theano.reduce(lambda v,acc : acc+v, x, o,_ = theano.reduce(lambda v,acc : acc+v, x,
theano.tensor.constant(numpy.asarray(0.,dtype=theano.config.floatX)) theano.tensor.constant(numpy.asarray(0.,dtype=theano.config.floatX))
) )
mode = theano.compile.mode.FAST_RUN
#f1 = theano.function([],o) mode = mode.excluding('inplace')
f1 = theano.function([],o, mode= mode)
# Get the scan node inputs, outputs = clone_optimized_graph(f1)
#scan_node = [n for n in f1.maker.env.toposort()
# if n.op.__class__.__name__=='Scan'][0] scan_nodes = grab_scan_node(outputs[0])
# Check how much memory it uses assert scan_nodes is not None
# Can actually do that since things are hidden by the infershape scan_node = scan_nodes[0]
# mechanism f1 = theano.function(inputs, scan_node.inputs[2])
#assert scan_node.inputs[2].value.shape == () assert f1().shape[0] == 1
gx = theano.tensor.grad(o, x) gx = theano.tensor.grad(o, x)
f2 = theano.function([],gx) f2 = theano.function([],gx)
assert numpy.allclose( f2(), numpy.ones((10,))) assert numpy.allclose( f2(), numpy.ones((10,)))
...@@ -2067,15 +2091,16 @@ class T_Scan(unittest.TestCase): ...@@ -2067,15 +2091,16 @@ class T_Scan(unittest.TestCase):
theano.tensor.constant(numpy.asarray(0.,dtype=theano.config.floatX)) theano.tensor.constant(numpy.asarray(0.,dtype=theano.config.floatX))
) )
#f1 = theano.function([],o) mode = theano.compile.mode.FAST_RUN
mode = mode.excluding('inplace')
f1 = theano.function([],o, mode= mode)
inputs, outputs = clone_optimized_graph(f1)
# Get the scan node scan_nodes = grab_scan_node(outputs[0])
#scan_node = [n for n in f1.maker.env.toposort() assert scan_nodes is not None
# if n.op.__class__.__name__=='Scan'][0] scan_node = scan_nodes[0]
# Check how much memory it uses f1 = theano.function(inputs, scan_node.inputs[2])
# Can actually do that since things are hidden by the infershape assert f1().shape[0] == 1
# mechanism
#assert scan_node.inputs[2].value.shape == ()
gx = theano.tensor.grad(o, x) gx = theano.tensor.grad(o, x)
f2 = theano.function([],gx) f2 = theano.function([],gx)
assert numpy.allclose( f2(), numpy.ones((10,))) assert numpy.allclose( f2(), numpy.ones((10,)))
...@@ -2088,15 +2113,16 @@ class T_Scan(unittest.TestCase): ...@@ -2088,15 +2113,16 @@ class T_Scan(unittest.TestCase):
theano.tensor.constant(numpy.asarray(0.,dtype=theano.config.floatX)) theano.tensor.constant(numpy.asarray(0.,dtype=theano.config.floatX))
) )
#f1 = theano.function([],o) mode = theano.compile.mode.FAST_RUN
mode = mode.excluding('inplace')
f1 = theano.function([],o, mode= mode)
inputs, outputs = clone_optimized_graph(f1)
# Get the scan node scan_nodes = grab_scan_node(outputs[0])
#scan_node = [n for n in f1.maker.env.toposort() assert scan_nodes is not None
# if n.op.__class__.__name__=='Scan'][0] scan_node = scan_nodes[0]
# Check how much memory it uses f1 = theano.function(inputs, scan_node.inputs[2])
# Can actually do that since things are hidden by the infershape assert f1().shape[0] == 1
# mechanism
#assert scan_node.inputs[2].value.shape == ()
gx = theano.tensor.grad(o, x) gx = theano.tensor.grad(o, x)
f2 = theano.function([],gx) f2 = theano.function([],gx)
assert numpy.allclose( f2(), numpy.ones((10,))) assert numpy.allclose( f2(), numpy.ones((10,)))
......
...@@ -6,6 +6,7 @@ ...@@ -6,6 +6,7 @@
import logging import logging
_logger = logging.getLogger('theano.tensor.opt') _logger = logging.getLogger('theano.tensor.opt')
import copy
import operator import operator
import itertools import itertools
import sys import sys
...@@ -573,6 +574,11 @@ class ShapeFeature(object): ...@@ -573,6 +574,11 @@ class ShapeFeature(object):
if hasattr(r.type,"broadcastable") and r.type.broadcastable[i]: if hasattr(r.type,"broadcastable") and r.type.broadcastable[i]:
return self.lscalar_one return self.lscalar_one
# If user provided size
elif ( hasattr(r.tag,'shape') and
r.tag.shape is not None and
r.tag.shape[i] is not None):
return T.constant(copy.copy(r.tag.shape[i]),dtype='int64')
else: else:
return Shape_i(i).make_node(r).outputs[0] return Shape_i(i).make_node(r).outputs[0]
...@@ -2740,7 +2746,14 @@ register_specialize(local_mul_specialize) ...@@ -2740,7 +2746,14 @@ register_specialize(local_mul_specialize)
@gof.local_optimizer([T.add]) @gof.local_optimizer([T.add])
def local_add_specialize(node): def local_add_specialize(node):
def fill_chain(v): def fill_chain(v):
return _fill_chain(v, node.inputs) # Not sure why this happens .. but I did not had the time to look
# into it, it probably has something to do with the dtype I'm
# providing the tag.shape of my variable
out = _fill_chain(v, node.inputs)
if out[0].dtype != node.outputs[0].dtype:
return [T.cast(out[0], dtype = node.outputs[0].dtype)]
else:
return out
#here, we are past the point of canonicalization, so we don't want to put in un-necessary fills. #here, we are past the point of canonicalization, so we don't want to put in un-necessary fills.
if node.op == T.add: if node.op == T.add:
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论