# Lazy import to avoid compilation when importing theano.
# Lazy import to avoid compilation when importing theano.
fromtheano.gof.cutilsimportrun_cthunk
fromtheano.gof.cutilsimportrun_cthunk# noqa
warnings.warn(
warnings.warn(
"DEPRECATION WARNING: The ProfileMode is deprecated. Use the Theano"
"DEPRECATION WARNING: The ProfileMode is deprecated. "
" flags/parameter to theano.function 'profile=True' instead"
"Use the Theano flags/parameter to theano.function "
" of 'mode=ProfileMode'")
"'profile=True' instead of 'mode=ProfileMode'")
returnret
returnret
...
@@ -209,17 +210,34 @@ class ProfileMode(Mode):
...
@@ -209,17 +210,34 @@ class ProfileMode(Mode):
self.fn_time=0
self.fn_time=0
defprint_summary(self,**kwargs):
defprint_summary(self,**kwargs):
""" Print 3 summary that show where the time is spend. The first show an Apply-wise summary, the second show an Op-wise summary, the third show an type-Op-wise summary.
""" Print 3 summaries that show where time is spent. The first shows
an Apply-wise summary, the second an Op-wise summary and the
The Apply-wise summary print the timing information for the worst offending Apply nodes. This corresponds to individual Op applications within your graph which take the longest to execute (so if you use dot twice, you will see two entries there).
third a type-Op-wise summary.
The Op-wise summary print the execution time of all Apply nodes executing the same Op are grouped together and the total execution time per Op is shown (so if you use dot twice, you will see only one entry there corresponding to the sum of the time spent in each of them). If two Op have different hash value, they will be separate.
The type-Op-wise summary group the result by type of op. So event if two Op have different hash value, they will be merged.
The Apply-wise summary prints the timing information for the
worst offending Apply nodes. This corresponds to individual Op
Their is an hack with the Op-wise summary. Go see it if you want to know more.
applications within your graph which take the longest to
execute (so if you use dot twice, you will see two entries
there).
The Op-wise summary prints the execution time of all Apply
nodes executing the same Op grouped together and the total
execution time per Op is shown (so if you use dot twice, you
will see only one entry there corresponding to the sum of the
time spent in each of them). If two Ops have different hash
value, they will be separate.
The type-Op-wise summary group the result by type of op. So
event if two Op have different hash value, they will be
merged.
There is an hack with the Op-wise summary. Go see it if you
want to know more.
:param kwargs: They are passed to print_summary_ expanded.
:param kwargs: They are passed to print_summary_ expanded.
Currently there is n_apply_to_print, n_ops_to_print and min_memory_size
Currently there is n_apply_to_print,
that are accepted.
n_ops_to_print and min_memory_size that are
accepted.
"""
"""
compile_time=sum([ps.compile_timeforps
compile_time=sum([ps.compile_timeforps
inself.profile_stats.values()])
inself.profile_stats.values()])
...
@@ -261,14 +279,18 @@ class ProfileMode(Mode):
...
@@ -261,14 +279,18 @@ class ProfileMode(Mode):
**kwargs)
**kwargs)
defprint_diff_summary(self,other,**kwargs):
defprint_diff_summary(self,other,**kwargs):
""" As print_summary, but print the difference on two different profile mode.
""" As print_summary, but print the difference on two different
TODO: Also we don't print the Apply-wise summary as it don't work for now.
profile mode.
TODO: Also we don't print the Apply-wise summary as it don't
work for now.
TODO: make comparaison with gpu code.
TODO: make comparaison with gpu code.
:param other: the other instance of ProfileMode that we want to be compared to.
:param other: the other instance of ProfileMode that we want
to be compared to.
:param kwargs: They are passed to print_summary_ expanded.
:param kwargs: They are passed to print_summary_ expanded.
Currently there is n_apply_to_print, n_ops_to_print and min_memory_size
Currently there is n_apply_to_print, n_ops_to_print and
that are accepted.
min_memory_size that are accepted.
"""
"""
defdiff_dict(a_time,b_time_):
defdiff_dict(a_time,b_time_):
...
@@ -331,7 +353,8 @@ class ProfileMode(Mode):
...
@@ -331,7 +353,8 @@ class ProfileMode(Mode):
print("ProfileMode is deprecated! Use the new profiler.")
print("ProfileMode is deprecated! Use the new profiler.")
print(" The Theano flags to enable it ise: profile=True")
print(" The Theano flags to enable it ise: profile=True")
print(" The Theano flags for the memory profile to it is: profile_memory=True")
print(" The Theano flags for the memory profile to it is: "
"profile_memory=True")
total_time=time.time()-import_time
total_time=time.time()-import_time
total_fct_time=sum(fct_call_time.values())
total_fct_time=sum(fct_call_time.values())
...
@@ -352,25 +375,37 @@ class ProfileMode(Mode):
...
@@ -352,25 +375,37 @@ class ProfileMode(Mode):
print('ProfileMode.%s(%s)'%(fct_name,message))
print('ProfileMode.%s(%s)'%(fct_name,message))
print('---------------------------')
print('---------------------------')
print()
print()
print('Time since import %.3fs'%(total_time))
print('Time since import %.3fs'%(total_time))
print('Theano compile time: %.3fs (%.1f%% since import)'%(compile_time,compile_time/total_time*100))
print('Theano compile time: %.3fs (%.1f%% since import)'%
sop_cimpl={}# map each op class to Bool. True iff all applies were done in c.
# map each op class to Bool. True iff all applies were done in c.
sop_cimpl={}
fora,tinop_time.items():
fora,tinop_time.items():
typ=type(a)
typ=type(a)
sop_time.setdefault(typ,0)
sop_time.setdefault(typ,0)
...
@@ -415,8 +452,11 @@ class ProfileMode(Mode):
...
@@ -415,8 +452,11 @@ class ProfileMode(Mode):
# Print the summary per op class.
# Print the summary per op class.
print()
print()
print('Single Op-wise summary:')
print('Single Op-wise summary:')
print('<% of local_time spent on this kind of Op> <cumulative %> <self seconds> <cumulative seconds> <time per call> [*] <nb_call> <nb_op> <nb_apply> <Op name>')
print('<% of local_time spent on this kind of Op> <cumulative %> '
print('\nHACK WARNING: we print the flops for some OP, but the logic don\'t always work. You need to know the internal of Theano to make it work correctly. Otherwise don\'t use!')
print("\nHACK WARNING: we print the flops for some OP, but the "
"logic doesn't always work. You need to know the "
"internals of Theano to make it work correctly. "
"Otherwise don't use it!")
print()
print()
print('Op-wise summary:')
print('Op-wise summary:')
print('<%% of local_time spent on this kind of Op> <cumulative %%> <self seconds> <cumulative seconds> <time per call> [*] %s <nb_call> <nb apply> <Op name>'%(flops_msg))
print('<%% of local_time spent on this kind of Op> <cumulative %%> '
'<self seconds> <cumulative seconds> <time per call> [*] %s '
print('<% of local_time spent at this position> <cumulative %%> <apply time> <cumulative seconds> <time per call> [*] <nb_call> <Apply position> <Apply Op name>')
print('<% of local_time spent at this position> <cumulative %%> '