//if we remove the restriction img_size_byte+kern_size_byte>8*1024, we can enter in condition where we will lower the occupency due to shared memory and/or registers.
if ((version == -1) && (out_size<64 || img_size_byte+kern_size_byte>8*1024) && out_size<=256){
self.shape_of={}# Variable -> tuple(scalars) or None (All tensor vars map to tuple)
self.scheduled={}# Variable ->
# Must be local to the object as otherwise we reuse the same
# variable for multiple env!
self.lscalar_one=T.constant(1,dtype='int64')
assertself.lscalar_one.type==T.lscalar
self.shape_of={}# Variable -> tuple(scalars) or None (All tensor vars map to tuple)
self.scheduled={}# Variable ->
fornodeinenv.toposort():
self.on_import(env,node)
...
...
@@ -725,12 +730,10 @@ class ShapeFeature(object):
'supported, and one should now use tensor.ShapeError '
'instead. The original exception message is: %s'%e)
exceptException,e:
_logger.error('Failed to infer_shape from Op %s (i_shapes=%s): %s%s'%(node.op,
_logger.error('Failed to infer_shape from Op %s.\nInput shapes:%s\nException encountered during infer_shape: %s\nException message: %s\nTraceback:%s'%(node.op,
[self.shape_of[r]forrinnode.inputs],
type(e),str(e)))
# We raise the exception to make sure the user knows something bad