提交 a87e9bb0 authored 作者: goodfeli's avatar goodfeli

Merge pull request #176 from jaberg/Composite_fixes

Composite fixes
...@@ -994,6 +994,8 @@ class FunctionMaker(object): ...@@ -994,6 +994,8 @@ class FunctionMaker(object):
try: try:
theano.config.compute_test_value = "off" theano.config.compute_test_value = "off"
start_optimizer = time.time() start_optimizer = time.time()
add_stack_trace_on_call = gof.Op.add_stack_trace_on_call
gof.Op.add_stack_trace_on_call = False
optimizer(env) optimizer(env)
end_optimizer = time.time() end_optimizer = time.time()
...@@ -1007,6 +1009,7 @@ class FunctionMaker(object): ...@@ -1007,6 +1009,7 @@ class FunctionMaker(object):
insert_deepcopy(env, inputs, outputs+additional_outputs) insert_deepcopy(env, inputs, outputs+additional_outputs)
finally: finally:
theano.config.compute_test_value = compute_test_value_orig theano.config.compute_test_value = compute_test_value_orig
gof.Op.add_stack_trace_on_call = add_stack_trace_on_call
# initialize the linker # initialize the linker
if not hasattr(linker, 'accept'): if not hasattr(linker, 'accept'):
......
...@@ -853,7 +853,8 @@ class CLinker(link.Linker): ...@@ -853,7 +853,8 @@ class CLinker(link.Linker):
libraries=self.libraries() libraries=self.libraries()
) )
@staticmethod @staticmethod
def cmodule_key_(env, no_recycling, compile_args=[], libraries=[]): def cmodule_key_(env, no_recycling, compile_args=[], libraries=[],
insert_config_md5=True):
""" """
Do the actual computation of cmodule_key in a static method Do the actual computation of cmodule_key in a static method
to allow it to be reused in scalar.Composite.__eq__ to allow it to be reused in scalar.Composite.__eq__
...@@ -871,11 +872,15 @@ class CLinker(link.Linker): ...@@ -871,11 +872,15 @@ class CLinker(link.Linker):
sig = ['CLinker.cmodule_key'] # will be cast to tuple on return sig = ['CLinker.cmodule_key'] # will be cast to tuple on return
if compile_args is not None: sig.append(tuple(compile_args)) if compile_args is not None: sig.append(tuple(compile_args))
if libraries is not None: sig.append(tuple(libraries)) if libraries is not None: sig.append(tuple(libraries))
# IMPORTANT: The 'md5' prefix is used to isolate the compilation # IMPORTANT: The 'md5' prefix is used to isolate the compilation
# parameters from the rest of the key. If you want to add more key # parameters from the rest of the key. If you want to add more key
# elements, they should be before this md5 hash if and only if they # elements, they should be before this md5 hash if and only if they
# can lead to a different compiled file with the same source code. # can lead to a different compiled file with the same source code.
sig.append('md5:' + theano.configparser.get_config_md5()) if insert_config_md5:
sig.append('md5:' + theano.configparser.get_config_md5())
else:
sig.append('md5: <omitted>')
# technically this should only be appended for gcc-compiled Ops # technically this should only be appended for gcc-compiled Ops
# and the flags of other compilers should be inserted here... but it's not clear how to # and the flags of other compilers should be inserted here... but it's not clear how to
......
...@@ -955,9 +955,10 @@ class ModuleCache(object): ...@@ -955,9 +955,10 @@ class ModuleCache(object):
if found == 0: if found == 0:
msg = 'Key not found in unpickled KeyData file' msg = 'Key not found in unpickled KeyData file'
if key_data.keys: if key_data.keys:
# This is only to make debugging in pdb easier, by providing # This is to make debugging in pdb easier, by providing
# the offending key in the local context. # the offending keys in the local context.
other_key = key_data.keys.__iter__().next() key_data_keys = list(key_data.keys)
## import pdb; pdb.set_trace()
elif found > 1: elif found > 1:
msg = 'Multiple equal keys found in unpickled KeyData file' msg = 'Multiple equal keys found in unpickled KeyData file'
if msg: if msg:
......
...@@ -310,6 +310,13 @@ class PureOp(object): ...@@ -310,6 +310,13 @@ class PureOp(object):
""" """
add_stack_trace_on_call = True
"""This class variable governs whether __call__ adds a stack trace to the node it creates.
The tag trace is meant to connect a node to the line a user typed. It is nice for
debugging. It does not make as much sense during optimizations to store this information.
"""
############# #############
# make_node # # make_node #
############# #############
...@@ -367,7 +374,8 @@ class PureOp(object): ...@@ -367,7 +374,8 @@ class PureOp(object):
""" """
node = self.make_node(*inputs, **kwargs) node = self.make_node(*inputs, **kwargs)
self.add_tag_trace(node) if self.add_stack_trace_on_call:
self.add_tag_trace(node)
if config.compute_test_value != 'off': if config.compute_test_value != 'off':
run_perform = True run_perform = True
......
...@@ -38,6 +38,10 @@ class NaiveAlgo(object): ...@@ -38,6 +38,10 @@ class NaiveAlgo(object):
:param scalar_op: the scalar operation to execute on each element. :param scalar_op: the scalar operation to execute on each element.
:param sync: if True, will wait after the kernel launch and check for error call. :param sync: if True, will wait after the kernel launch and check for error call.
""" """
if scalar_op.c_support_code_apply(node=None, nodename="nodename"):
raise ValueError(('It is currently not possible to auto-generate'
' a GPU implementation for an elementwise Op with support'
' code'), scalar_op)
self.scalar_op = scalar_op self.scalar_op = scalar_op
self.sync = sync self.sync = sync
self.inplace_pattern = inplace_pattern self.inplace_pattern = inplace_pattern
...@@ -799,12 +803,15 @@ nd_collapse_[i]=0; ...@@ -799,12 +803,15 @@ nd_collapse_[i]=0;
def c_support_code_apply(self, node, nodename): def c_support_code_apply(self, node, nodename):
nd = node.outputs[0].type.ndim nd = node.outputs[0].type.ndim
return "".join( defines = """
[self.c_src_kernel(node, nodename,x) for x in xrange(1,nd+1)]+ #define INTDIV_POW2(a, b) (a >> b)
[ #define INTMOD_POW2(a, b) (a & ((1<<b)-1))
self.c_src_kernel_Ccontiguous(node, nodename), """
self.c_src_callkernel(node, nodename), kernels = "".join(
]) [self.c_src_kernel(node, nodename, x) for x in xrange(1, nd + 1)]
+ [self.c_src_kernel_Ccontiguous(node, nodename)],
+ [self.c_src_callkernel(node, nodename)])
return defines + kernels
def c_code(self, node, nodename, inputs, outputs, sub): def c_code(self, node, nodename, inputs, outputs, sub):
d = dict(sub) d = dict(sub)
...@@ -951,8 +958,3 @@ nd_collapse_[i]=0; ...@@ -951,8 +958,3 @@ nd_collapse_[i]=0;
#print sio.getvalue() #print sio.getvalue()
return sio.getvalue() return sio.getvalue()
def c_support_code(self):
return """
#define INTDIV_POW2(a, b) (a >> b)
#define INTMOD_POW2(a, b) (a & ((1<<b)-1))
"""
...@@ -37,13 +37,13 @@ gpu_seqopt.register('gpu_cut_transfers', gpu_cut_copies, 2, ...@@ -37,13 +37,13 @@ gpu_seqopt.register('gpu_cut_transfers', gpu_cut_copies, 2,
optdb.register('gpu_opt', optdb.register('gpu_opt',
gpu_seqopt, gpu_seqopt,
optdb.__position__.get('add_destroy_handler', 49.5) - 1, optdb.__position__.get('add_destroy_handler', 49.5) - 1,
'gpu') 'gpu', 'fast_run')
# This second pass is needed as the fusion can put all the non float32 code # This second pass is needed as the fusion can put all the non float32 code
# inside the elemwise. When it there is no float64 op, this is working. # inside the elemwise. When it there is no float64 op, this is working.
optdb.register('gpu_after_fusion', optdb.register('gpu_after_fusion',
ProxyDB(gpu_seqopt), ProxyDB(gpu_seqopt),
optdb.__position__.get('elemwise_fusion', 71) + .1, optdb.__position__.get('elemwise_fusion', 71) + .1,
'gpu') 'gpu', 'fast_run')
def register_opt(*tags, **kwargs): def register_opt(*tags, **kwargs):
def f(local_opt): def f(local_opt):
...@@ -144,7 +144,11 @@ def local_gpu_elemwise_0(node): ...@@ -144,7 +144,11 @@ def local_gpu_elemwise_0(node):
if numpy.all([o.type.dtype == 'float32' for o in node.outputs]): if numpy.all([o.type.dtype == 'float32' for o in node.outputs]):
# Don't set any inplace pattern. # Don't set any inplace pattern.
# gpu_inplace_elemwise_optimizer will do it later # gpu_inplace_elemwise_optimizer will do it later
new_op = GpuElemwise(node.op.scalar_op) try:
new_op = GpuElemwise(node.op.scalar_op)
except ValueError:
# This happens when scalar_op requires support code
return False
# first establish that float32 can store all inputs # first establish that float32 can store all inputs
upcastable = set(['float32', 'int8', 'int16', 'uint8', 'uint16']) upcastable = set(['float32', 'int8', 'int16', 'uint8', 'uint16'])
...@@ -188,7 +192,11 @@ def local_gpu_elemwise_1(node): ...@@ -188,7 +192,11 @@ def local_gpu_elemwise_1(node):
elemwise_node = host_i.owner elemwise_node = host_i.owner
# Don't set any inplace pattern. # Don't set any inplace pattern.
# gpu_inplace_elemwise_optimizer will do it later # gpu_inplace_elemwise_optimizer will do it later
new_op = GpuElemwise(elemwise_node.op.scalar_op) try:
new_op = GpuElemwise(elemwise_node.op.scalar_op)
except ValueError:
# This happens when scalar_op requires support code
return False
if all([i.dtype=='float32' for i in elemwise_node.inputs]): if all([i.dtype=='float32' for i in elemwise_node.inputs]):
gpu_elemwise = new_op(*[gpu_from_host(i) for i in elemwise_node.inputs]) gpu_elemwise = new_op(*[gpu_from_host(i) for i in elemwise_node.inputs])
gpu_elemwise = split_huge_add_or_mul(gpu_elemwise.owner) gpu_elemwise = split_huge_add_or_mul(gpu_elemwise.owner)
......
差异被折叠。
...@@ -208,5 +208,9 @@ class test_div(unittest.TestCase): ...@@ -208,5 +208,9 @@ class test_div(unittest.TestCase):
assert isinstance((a/c).owner.op, TrueDiv) assert isinstance((a/c).owner.op, TrueDiv)
# Testing of Composite is done in tensor/tests/test_opt.py
# in test_fusion, TestCompositeCodegen
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
...@@ -1081,6 +1081,7 @@ def tensor4(name=None, dtype=None): ...@@ -1081,6 +1081,7 @@ def tensor4(name=None, dtype=None):
tensor4s, ftensor4s, dtensor4s, itensor4s, ltensor4s = _multi(tensor4, ftensor4, dtensor4, tensor4s, ftensor4s, dtensor4s, itensor4s, ltensor4s = _multi(tensor4, ftensor4, dtensor4,
itensor4, ltensor4) itensor4, ltensor4)
class _tensor_py_operators: class _tensor_py_operators:
#UNARY #UNARY
def __abs__(self): return abs_(self) def __abs__(self): return abs_(self)
...@@ -1370,10 +1371,14 @@ class _tensor_py_operators: ...@@ -1370,10 +1371,14 @@ class _tensor_py_operators:
def get_constant_value(self): def get_constant_value(self):
return get_constant_value(self) return get_constant_value(self)
class TensorVariable(_tensor_py_operators, Variable): class TensorVariable(_tensor_py_operators, Variable):
"""Subclass to add the tensor operators to the basic `Variable` class.""" """Subclass to add the tensor operators to the basic `Variable` class."""
TensorType.Variable = TensorVariable TensorType.Variable = TensorVariable
class TensorConstantSignature(tuple): class TensorConstantSignature(tuple):
"""A Signature object for comparing TensorConstant instances """A Signature object for comparing TensorConstant instances
...@@ -1497,7 +1502,8 @@ class TensorValue(_tensor_py_operators, Value): ...@@ -1497,7 +1502,8 @@ class TensorValue(_tensor_py_operators, Value):
Tensor = TensorType Tensor = TensorType
#QUESTION: why are we doing this!?
# This bizarre push-import avoids a circular dependency.
elemwise.as_tensor_variable = as_tensor_variable elemwise.as_tensor_variable = as_tensor_variable
elemwise.TensorType = TensorType elemwise.TensorType = TensorType
elemwise.TensorVariable = TensorVariable elemwise.TensorVariable = TensorVariable
...@@ -1505,29 +1511,10 @@ elemwise.TensorConstant = TensorConstant ...@@ -1505,29 +1511,10 @@ elemwise.TensorConstant = TensorConstant
elemwise.TensorValue = TensorValue elemwise.TensorValue = TensorValue
######################### #########################
# Utilities # Utilities
######################### #########################
def _elemwise(scalar_op, name, doc_prefix=''):
straight = elemwise.Elemwise(scalar_op, name = name)
inplace_scalar_op = scalar_op.__class__(scal.transfer_type(0))
inplace = elemwise.Elemwise(inplace_scalar_op, {0: 0}, name = name+"_inplace")
# don't add the inplace versions, they aren't supposed to be part of the user interface
_constructor_list.append(straight)
# This is here so that gen_oplist can detect which module declared these variables.
straight.__module__ = 'tensor'
inplace.__module__ = 'tensor'
if doc_prefix:
straight.__doc__ = doc_prefix + '\n' + straight.__doc__
return straight, inplace
def _redefine(real_symbol_value, module='tensor'): def _redefine(real_symbol_value, module='tensor'):
"""Replace the value associated with a function symbol. """Replace the value associated with a function symbol.
...@@ -1538,12 +1525,14 @@ def _redefine(real_symbol_value, module='tensor'): ...@@ -1538,12 +1525,14 @@ def _redefine(real_symbol_value, module='tensor'):
return real_symbol_value return real_symbol_value
return decorator return decorator
def _redefine_asRoutine(real_symbol_value): def _redefine_asRoutine(real_symbol_value):
real_symbol_value.__epydoc_asRoutine = True real_symbol_value.__epydoc_asRoutine = True
def decorator(f): def decorator(f):
return real_symbol_value return real_symbol_value
return decorator return decorator
def _scal_elemwise_with_nfunc(nfunc, nin, nout): def _scal_elemwise_with_nfunc(nfunc, nin, nout):
""" """
Replace a symbol definition with an elementwise version of the Replace a symbol definition with an elementwise version of the
......
...@@ -793,7 +793,7 @@ class Elemwise(Op): ...@@ -793,7 +793,7 @@ class Elemwise(Op):
rval.append(tuple(oshp)) rval.append(tuple(oshp))
return rval return rval
def _c_all(self, node, name, inames, onames, sub): def _c_all(self, node, nodename, inames, onames, sub):
_inames = inames _inames = inames
_onames = onames _onames = onames
...@@ -901,7 +901,7 @@ class Elemwise(Op): ...@@ -901,7 +901,7 @@ class Elemwise(Op):
Apply(self.scalar_op, Apply(self.scalar_op,
[Scalar(dtype = input.type.dtype)() for input in node.inputs], [Scalar(dtype = input.type.dtype)() for input in node.inputs],
[Scalar(dtype = output.type.dtype)() for output in node.outputs]), [Scalar(dtype = output.type.dtype)() for output in node.outputs]),
name + '_scalar_', nodename + '_scalar_',
["%s_i" % s for s in _inames], ["%s_i" % s for s in _inames],
["%s_i" % s for s in onames], ["%s_i" % s for s in onames],
sub) sub)
...@@ -922,19 +922,20 @@ class Elemwise(Op): ...@@ -922,19 +922,20 @@ class Elemwise(Op):
sub = sub) sub = sub)
return decl, checks, alloc, loop return decl, checks, alloc, loop
def c_code(self, node, name, inames, onames, sub): def c_code(self, node, nodename, inames, onames, sub):
code = "\n".join(self._c_all(node, name, inames, onames, sub)) code = "\n".join(self._c_all(node, nodename, inames, onames, sub))
return code return code
def c_headers(self): def c_headers(self):
return ['<vector>', '<algorithm>'] return ['<vector>', '<algorithm>']
def c_support_code(self): def c_support_code_apply(self, node, nodename):
support_code = self.scalar_op.c_support_code() support_code = self.scalar_op.c_support_code_apply(node,
nodename + '_scalar_')
return support_code return support_code
def c_code_cache_version_apply(self, node): def c_code_cache_version_apply(self, node):
version = [5] # the version corresponding to the c code in this Op version = [6] # the version corresponding to the c code in this Op
# now we insert versions for the ops on which we depend... # now we insert versions for the ops on which we depend...
scalar_node = Apply(self.scalar_op, scalar_node = Apply(self.scalar_op,
......
...@@ -629,18 +629,23 @@ class ShapeFeature(object): ...@@ -629,18 +629,23 @@ class ShapeFeature(object):
""" """
def shape_ir(self, i, r): def shape_ir(self, i, r):
#TODO: Write a doc string for this method """Return symbolic r.shape[i] for tensor variable r, int i"""
if hasattr(r.type,"broadcastable") and r.type.broadcastable[i]: if hasattr(r.type,"broadcastable") and r.type.broadcastable[i]:
return self.lscalar_one return self.lscalar_one
else: else:
return Shape_i(i).make_node(r).outputs[0] return Shape_i(i).make_node(r).outputs[0]
def shape_tuple(self, r): def shape_tuple(self, r):
#TODO: Write a doc string for this method """Return a tuple of symbolic shape vars for tensor variable r"""
return tuple([self.shape_ir(i,r) for i in xrange(r.ndim)]) return tuple([self.shape_ir(i,r) for i in xrange(r.ndim)])
def default_infer_shape(self, node, i_shapes): def default_infer_shape(self, node, i_shapes):
"""Return a list of shape tuple or None for the outputs of node.
This function is used for Ops that don't implement infer_shape.
Ops that do implement infer_shape should use the i_shapes parameter,
but this default implementation ignores it.
"""
rval = [] rval = []
for r in node.outputs: for r in node.outputs:
try: try:
...@@ -650,16 +655,21 @@ class ShapeFeature(object): ...@@ -650,16 +655,21 @@ class ShapeFeature(object):
return rval return rval
def unpack(self, s_i): def unpack(self, s_i):
"""Return a symbolic integer scalar for the shape element s_i.
The s_i argument was produced by the infer_shape() of an Op subclass.
"""
# unpack the s_i that the Op returned # unpack the s_i that the Op returned
assert s_i is not None assert s_i is not None
if s_i == 1: if s_i == 1:
# don't make the optimizer merge a zillion ones together # don't make the optimizer merge a zillion ones together
# by always returning the same object to represent 1
return self.lscalar_one return self.lscalar_one
if type(s_i) in (int,long) or isinstance(s_i, numpy.integer): if type(s_i) in (int,long) or isinstance(s_i, numpy.integer):
# this shape is a constant # this shape is a constant
assert s_i >= 0 assert s_i >= 0
return T.constant(s_i, dtype='int64') return T.constant(s_i, dtype='int64')
if type(s_i) in (tuple,list): if type(s_i) in (tuple, list):
# this dimension is the same as many of the inputs # this dimension is the same as many of the inputs
# which tells us that if one of the inputs is known, # which tells us that if one of the inputs is known,
# the others all become known. # the others all become known.
...@@ -676,11 +686,19 @@ class ShapeFeature(object): ...@@ -676,11 +686,19 @@ class ShapeFeature(object):
s_i, type(s_i), getattr(s_i, 'type', None)) s_i, type(s_i), getattr(s_i, 'type', None))
def set_shape(self, r, s): def set_shape(self, r, s):
"""Assign the shape `s` to previously un-shaped variable `r`.
:type r: a variable
:type s: None or a tuple of symbolic integers
"""
assert r not in self.shape_of, 'r already in shape_of' assert r not in self.shape_of, 'r already in shape_of'
if s is None: if s is None:
self.shape_of[r] = s self.shape_of[r] = s
else: else:
self.shape_of[r] = tuple([self.unpack(s_i) for s_i in s]) shape_vars = [self.unpack(s_i) for s_i in s]
self.shape_of[r] = tuple(shape_vars)
for sv in shape_vars:
self.shape_of_reverse_index.setdefault(sv, set()).add(r)
def update_shape(self, r, other_r): def update_shape(self, r, other_r):
'''Replace shape of r by shape of other_r. '''Replace shape of r by shape of other_r.
...@@ -692,16 +710,17 @@ class ShapeFeature(object): ...@@ -692,16 +710,17 @@ class ShapeFeature(object):
assert other_r in self.shape_of, ('other_r not in shape_of', other_r) assert other_r in self.shape_of, ('other_r not in shape_of', other_r)
other_shape = self.shape_of[other_r] other_shape = self.shape_of[other_r]
# If other_shape has no information, call is pointless.
if other_shape is None:
return
if r in self.shape_of: if r in self.shape_of:
r_shape = self.shape_of[r] r_shape = self.shape_of[r]
else: else:
# If no info is known on r's shape, use other_shape # If no info is known on r's shape, use other_shape
self.shape_of[r] = other_shape self.shape_of[r] = other_shape
return for sv in other_shape:
self.shape_of_reverse_index.setdefault(sv, set()).add(r)
# If other_shape has no information, use r_shape
if other_shape is None:
self.shape_of[r] = r_shape
return return
# Merge other_shape with r_shape, giving the priority to other_shape # Merge other_shape with r_shape, giving the priority to other_shape
...@@ -711,14 +730,16 @@ class ShapeFeature(object): ...@@ -711,14 +730,16 @@ class ShapeFeature(object):
# For now, we consider 2 cases of uninformative other_shape[i]: # For now, we consider 2 cases of uninformative other_shape[i]:
# - Shape_i(i)(other_r); # - Shape_i(i)(other_r);
# - Shape_i(i)(r). # - Shape_i(i)(r).
if (ps.owner and if (ps.owner
isinstance(getattr(ps.owner,'op',None), Shape_i) and and isinstance(getattr(ps.owner, 'op', None), Shape_i)
ps.owner.op.i == i and and ps.owner.op.i == i
ps.owner.inputs[0] in (r, other_r)): and ps.owner.inputs[0] in (r, other_r)):
merged_shape.append(r_shape[i]) merged_shape.append(r_shape[i])
else: else:
merged_shape.append(other_shape[i]) merged_shape.append(other_shape[i])
self.shape_of[r] = tuple(merged_shape) self.shape_of[r] = tuple(merged_shape)
for sv in self.shape_of[r]:
self.shape_of_reverse_index.setdefault(sv, set()).add(r)
def set_shape_i(self, r, i, s_i): def set_shape_i(self, r, i, s_i):
'''Replace element i of shape_of[r] by s_i''' '''Replace element i of shape_of[r] by s_i'''
...@@ -733,14 +754,16 @@ class ShapeFeature(object): ...@@ -733,14 +754,16 @@ class ShapeFeature(object):
else: else:
new_shape.append(s_j) new_shape.append(s_j)
self.shape_of[r] = tuple(new_shape) self.shape_of[r] = tuple(new_shape)
for sv in self.shape_of[r]:
self.shape_of_reverse_index.setdefault(sv, set()).add(r)
def init_r(self, r): def init_r(self, r):
'''Register r's shape in the shape_of dictionary.''' '''Register r's shape in the shape_of dictionary.'''
if r not in self.shape_of: if r not in self.shape_of:
try: try:
self.set_shape(r, self.shape_tuple(r)) self.set_shape(r, self.shape_tuple(r))
except AttributeError: except AttributeError: #XXX: where would this come from?
self.set_shape(r,None) self.set_shape(r, None)
def make_vector_shape(self, r): def make_vector_shape(self, r):
return make_vector(*self.shape_of[r]) return make_vector(*self.shape_of[r])
...@@ -757,8 +780,15 @@ class ShapeFeature(object): ...@@ -757,8 +780,15 @@ class ShapeFeature(object):
self.lscalar_one = T.constant(1, dtype='int64') self.lscalar_one = T.constant(1, dtype='int64')
assert self.lscalar_one.type == T.lscalar assert self.lscalar_one.type == T.lscalar
self.shape_of = {} # Variable -> tuple(scalars) or None (All tensor vars map to tuple) self.shape_of = {}
self.scheduled = {} # Variable -> # Variable -> tuple(scalars) or None (All tensor vars map to tuple)
self.scheduled = {}
# Variable ->
self.shape_of_reverse_index = {}
# shape var -> graph v
for node in env.toposort(): for node in env.toposort():
self.on_import(env, node) self.on_import(env, node)
...@@ -798,9 +828,11 @@ class ShapeFeature(object): ...@@ -798,9 +828,11 @@ class ShapeFeature(object):
# this is packed information # this is packed information
# an element of o_shapes is either None or a tuple # an element of o_shapes is either None or a tuple
# elements of the tuple can be either strings, or ints # elements of the tuple can be either strings, or ints
if len(o_shapes) != len(node.outputs): if len(o_shapes) != len(node.outputs):
raise Exception('len(o_shapes) = '+str(len(o_shapes))+' != len(node.outputs) = '+str(len(node.outputs))) raise Exception('len(o_shapes) = '
+ str(len(o_shapes))
+ ' != len(node.outputs) = '
+ str(len(node.outputs)))
for r, s in zip(node.outputs, o_shapes): for r, s in zip(node.outputs, o_shapes):
self.set_shape(r, s) self.set_shape(r, s)
...@@ -818,23 +850,28 @@ class ShapeFeature(object): ...@@ -818,23 +850,28 @@ class ShapeFeature(object):
# the shape of new_r. Say that r is *scheduled*. # the shape of new_r. Say that r is *scheduled*.
# At that point, node is no longer a client of r, but of new_r # At that point, node is no longer a client of r, but of new_r
for (shpnode, idx) in (r.clients + [(node, i)]): for (shpnode, idx) in (r.clients + [(node, i)]):
if isinstance(getattr(shpnode,'op', None), Shape_i): if isinstance(getattr(shpnode, 'op', None), Shape_i):
self.scheduled[shpnode] = new_r self.scheduled[shpnode] = new_r
# In case 2, if r is a variable that we've scheduled for shape update, then we # In case 2, if r is a variable that we've scheduled for shape update, then we
# should cancel it. # should cancel it.
# TODO: store some kind of reverse index? unscheduled = [k for k, v in self.scheduled.items() if v == r]
for k,v in self.scheduled.items(): for k in unscheduled:
if v == r: del self.scheduled[k]
del self.scheduled[k]
# In either case, r could be in shape_of.values(), that is, r itself # In either case, r could be in shape_of.values(), that is, r itself
# is the shape of something. In that case, we want to update # is the shape of something. In that case, we want to update
# the value in shape_of, to keep it up-to-date. # the value in shape_of, to keep it up-to-date.
for k,v in self.shape_of.iteritems(): for v in self.shape_of_reverse_index.get(r, []):
if v is not None: # The reverse index is only approximate. It is not updated on
for ii, vi in enumerate(v): # deletion of variables, or on change_input so it might be the
if vi == r: # case that there are a few extra `v`'s in it that no longer have
self.set_shape_i(k, ii, new_r) # a shape of r or possibly have been deleted from shape_of
# entirely. The important thing is that it permits to recall
# all variables with r in their shape.
for ii, svi in enumerate(self.shape_of.get(v, [])):
if svi == r:
self.set_shape_i(v, ii, new_r)
self.shape_of_reverse_index[r] = set()
class ShapeOptimizer(Optimizer): class ShapeOptimizer(Optimizer):
"""Optimizer that serves to add ShapeFeature as an env feature. """Optimizer that serves to add ShapeFeature as an env feature.
...@@ -926,6 +963,7 @@ def local_track_shape_i(node): ...@@ -926,6 +963,7 @@ def local_track_shape_i(node):
if node in shape_feature.scheduled: if node in shape_feature.scheduled:
assert isinstance(node.op, Shape_i) assert isinstance(node.op, Shape_i)
replacement = shape_feature.scheduled[node] replacement = shape_feature.scheduled[node]
# XXX: what the heck is up with node.op.i ???
return [shape_feature.shape_of[replacement][node.op.i]] return [shape_feature.shape_of[replacement][node.op.i]]
@register_specialize @register_specialize
......
...@@ -767,8 +767,8 @@ class test_fusion(unittest.TestCase): ...@@ -767,8 +767,8 @@ class test_fusion(unittest.TestCase):
cases = [ cases = [
(fx+fy+fz,(fx,fy,fz),(fxv,fyv,fzv),1,fxv+fyv+fzv,'float32'),#0 (fx+fy+fz,(fx,fy,fz),(fxv,fyv,fzv),1,fxv+fyv+fzv,'float32'),#0
(fx*fy*fz,(fx,fy,fz),(fxv,fyv,fzv),1,fxv*fyv*fzv,'float32'),#1 (fx*fy*fz,(fx,fy,fz),(fxv,fyv,fzv),1,fxv*fyv*fzv,'float32'),#1
(fx+fy*fz,(fx,fy,fz),(fxv,fyv,fzv),1,fxv+fyv*fzv,'float32'), (fx+fy*fz,(fx,fy,fz),(fxv,fyv,fzv),1,fxv+fyv*fzv,'float32'),#2
(fx*fy+fz,(fx,fy,fz),(fxv,fyv,fzv),1,fxv*fyv+fzv,'float32'), (fx*fy+fz,(fx,fy,fz),(fxv,fyv,fzv),1,fxv*fyv+fzv,'float32'),#3
(fw+fx+fy+fz,(fw,fx,fy,fz),(fwv,fxv,fyv,fzv),1,fwv+fxv+fyv+fzv,'float32'), (fw+fx+fy+fz,(fw,fx,fy,fz),(fwv,fxv,fyv,fzv),1,fwv+fxv+fyv+fzv,'float32'),
((fw+fx)+(fy+fz),(fw,fx,fy,fz),(fwv,fxv,fyv,fzv),1,fwv+fxv+fyv+fzv,'float32'),#5 ((fw+fx)+(fy+fz),(fw,fx,fy,fz),(fwv,fxv,fyv,fzv),1,fwv+fxv+fyv+fzv,'float32'),#5
(((fw+fx)+fy)+fz,(fw,fx,fy,fz),(fwv,fxv,fyv,fzv),1,fwv+fxv+fyv+fzv,'float32'), (((fw+fx)+fy)+fz,(fw,fx,fy,fz),(fwv,fxv,fyv,fzv),1,fwv+fxv+fyv+fzv,'float32'),
...@@ -891,11 +891,19 @@ class test_fusion(unittest.TestCase): ...@@ -891,11 +891,19 @@ class test_fusion(unittest.TestCase):
t1=time.time() t1=time.time()
out=out.get_value() out=out.get_value()
#print "CASE2/3", f.maker.env.toposort()
#print 'CASE2/3', f.maker.env
#print 'CASE2/3', f.maker.env.toposort()[3].op.scalar_op.env
times[id]=t1-t0 times[id]=t1-t0
atol=1e-8 atol=1e-8
if out_dtype=='float32':atol=1e-6 if out_dtype=='float32':atol=1e-6
if not numpy.allclose(out,answer*nb_repeat,atol=atol): if not numpy.allclose(out,answer*nb_repeat,atol=atol):
fail1.append(id) fail1.append(id)
print val_inputs
print out
print answer*nb_repeat
#assert 0
topo=f.maker.env.toposort() topo=f.maker.env.toposort()
if gpu: if gpu:
import theano.sandbox.cuda as cuda import theano.sandbox.cuda as cuda
...@@ -1109,6 +1117,70 @@ class test_fusion(unittest.TestCase): ...@@ -1109,6 +1117,70 @@ class test_fusion(unittest.TestCase):
# cases[id]=None #to remove g, that link to out that link to the ndarray! # cases[id]=None #to remove g, that link to out that link to the ndarray!
#g.owner.inputs[0] is out... make owner a weakref? #g.owner.inputs[0] is out... make owner a weakref?
class TestCompositeCodegen(unittest.TestCase):
"""
Test The Composite Ops code generation in a case where there is multiple
scalar ops with support code.
"""
def setUp(self):
class TimesN(theano.scalar.basic.UnaryScalarOp):
def __init__(self, n, *args, **kwargs):
self.n = n
theano.scalar.basic.UnaryScalarOp.__init__(self, *args, **kwargs)
def impl(self, x):
return x * self.n
def c_support_code_apply(self, node, nodename):
n = str(self.n)
return """
float %(nodename)s_timesn(float x) { return x * %(n)s; }
""" % locals()
def c_code(self, node, name, (x, ), (z, ), sub):
return "%(z)s = %(name)s_timesn(%(x)s);" % locals()
upgrade_to_float = theano.scalar.basic.upgrade_to_float
self.scal_times_2 = TimesN(2, upgrade_to_float, name='times_2')
self.times_2 = theano.tensor.elemwise.Elemwise(
self.scal_times_2,
name='times_2')
self.scal_times_3 = TimesN(3, upgrade_to_float, name='times_3')
self.times_3 = theano.tensor.elemwise.Elemwise(
self.scal_times_3,
name='times_3')
self.x = fvector()
def test_nested_composite(self):
y = self.times_2(self.x)
z = self.times_3(y)
f = function([self.x], z)
assert len(f.maker.env.toposort()) == 1
fval = f([1, 2, 3])
assert numpy.all(fval == [6, 12, 18])
def test_nested_gpu(self):
import theano.sandbox.cuda as cuda
if not cuda.cuda_available:
raise SkipTest("cuda not available")
import theano.sandbox.cuda.opt
y = self.times_2(self.x)
z = self.times_3(y)
f = theano.function([self.x], cuda.gpu_from_host(z))
topo = f.maker.env.toposort()
assert len(topo) == 2
assert topo[1].op == cuda.gpu_from_host
# topo1 is doing the composite work on the CPU. Auto-generation of
# GPU code for ops with support code is not possible.
fval = numpy.asarray(f([1, 2, 3]))
assert numpy.all(fval == [6, 12, 18]), fval
def test_log1p(): def test_log1p():
m = theano.config.mode m = theano.config.mode
if m == 'FAST_COMPILE': if m == 'FAST_COMPILE':
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论