提交 cd3979e5 authored 作者: abergeron's avatar abergeron

Merge pull request #2473 from nouiz/profile_import

Profile import and crash fix
......@@ -1410,6 +1410,7 @@ class FunctionMaker(object):
# Get a function instance
start_linker = time.time()
start_import_time = theano.gof.cmodule.import_time
add_stack_trace_on_call_orig = gof.Op.add_stack_trace_on_call
limit_orig = theano.config.traceback.limit
try:
......@@ -1428,6 +1429,8 @@ class FunctionMaker(object):
if self.profile:
self.profile.linker_time += linker_time
_fn.time_thunks = self.profile.flag_time_thunks
import_time = theano.gof.cmodule.import_time - start_import_time
self.profile.import_time += import_time
fn = self.function_builder(_fn, _i, _o, self.indices, self.outputs,
defaults, self.unpack_single, self.return_none, self)
......
......@@ -104,7 +104,7 @@ def _atexit_print_fn():
for ps in to_sum[1:]:
for attr in ["compile_time", "fct_call_time", "fct_callcount",
"vm_call_time", "optimizer_time", "linker_time",
"validate_time"]:
"validate_time", "import_time"]:
setattr(cum, attr, getattr(cum, attr) + getattr(ps, attr))
# merge dictonary
......@@ -194,6 +194,9 @@ class ProfileStats(object):
linker_time = 0.0
# time spent linking graph (FunctionMaker.create)
import_time = 0.0
# time spent in importing compiled python module.
line_width = config.profiling.output_line_width
optimizer_profile = None
......@@ -640,6 +643,7 @@ class ProfileStats(object):
print >> file, (' Theano Linker time (includes C,'
' CUDA code generation/compiling): %es' %
self.linker_time)
print >> file, ' Import time %es' % self.import_time
print >> file, ''
# The validation time is a subset of optimizer_time
......
......@@ -71,6 +71,8 @@ _logger = logging.getLogger("theano.gof.cmodule")
METH_VARARGS = "METH_VARARGS"
METH_NOARGS = "METH_NOARGS"
# global variable that represent the total time spent in importing module.
import_time = 0
class MissingGXX(Exception):
......@@ -282,11 +284,15 @@ def dlimport(fullpath, suffix=None):
_logger.debug("module_name %s", module_name)
sys.path[0:0] = [workdir] # insert workdir at beginning (temporarily)
global import_time
try:
if importlib is not None:
if hasattr(importlib, "invalidate_caches"):
importlib.invalidate_caches()
t0 = time.time()
rval = __import__(module_name, {}, {}, [module_name])
t1 = time.time()
import_time += t1 - t0
if not rval:
raise Exception('__import__ failed', fullpath)
finally:
......
......@@ -318,7 +318,8 @@ class Container(object):
else:
self.type = r.type
if name is None:
self.name = r.name
# Some Type do not have a name field.
self.name = getattr(r, 'name', None)
else:
self.name = name
......@@ -730,9 +731,9 @@ class WrapLinker(Linker):
wrapper=self.wrapper)
return other
def clone(allow_gc=undef):
def clone(self, allow_gc=undef):
return self.__class__(
linkers=[l.clone(allow_gc=allow_gc)],
linkers=[l.clone(allow_gc=allow_gc) for l in self.linkers],
wrapper=self.wrapper)
def accept(self, fgraph, no_recycling=None):
......
......@@ -43,7 +43,7 @@ def simple_extract_stack(f=None, limit=None):
list.reverse()
return list
if sys.version_info[:2] <= (3, 2):
if sys.version_info[:2] > (3, 4):
# I enable my implementation only for some python version just to
# be sure the Python internal do not change. If this work with
# other python version, you can enable it.
......
......@@ -205,7 +205,7 @@ if __name__ == "__main__":
gpu
K6000/NOECC 0.06s 0.06s
K40 0.07s
K20m/ECC 0.08s 0.07s
K20m/ECC 0.08s 0.08s 0.07s
K20/NOECC 0.07s
M2090 0.19s
C2075 0.25s
......@@ -233,6 +233,7 @@ if __name__ == "__main__":
GTX 460 0.37s 0.45s
GTX 285 0.42s 0.452s 0.452s 0.40s # cuda 3.0 seems faster? driver version?
750M 0.49s
GT 610 2.38s
GTX 550 Ti 0.57s
GT 520 2.68s 3.06s
520M 2.44s 3.19s # with bumblebee on Ubuntu 12.04
......
......@@ -417,8 +417,8 @@ class GpuDnnConv(DnnBase, COp):
return [(
b, nb,
(h + 2*padh - kh)/sh + 1,
(w + 2*padw - kw)/sw + 1
(h + 2*padh - kh)//sh + 1,
(w + 2*padw - kw)//sw + 1
)]
......@@ -731,8 +731,8 @@ class GpuDnnPool(DnnBase):
return [(
shape[0][0],
shape[0][1],
(shape[0][2] - kh)/sh + 1,
(shape[0][3] - kw)/sw + 1
(shape[0][2] - kh)//sh + 1,
(shape[0][3] - kw)//sw + 1
)]
def c_support_code_struct(self, node, name):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论