Improved overall presentation

f302a037 · Olivier Delalleau · 93f2c09c · f302a037
--- a/theano/misc/check_blas.py
+++ b/theano/misc/check_blas.py
@@ -14,6 +14,7 @@ GTX 480 5.83s
 import os
 import sys
 import time
+from optparse import OptionParser

 import numpy
 import theano
@@ -25,13 +26,13 @@ from theano.gof.python25 import any
 def execute(execute=True, verbose=True, M=2000, N=2000, K=2000,
            iters=10, order='C'):
    """
-    :param execute: If True, execute a Theano function that should call gemm
-    :param verbose: If True, will print some Theano flags and env variable.
-    :param M,N,K: the M,N,K size used by gemm
-    :param iters: the number of call to gemm to do
+    :param execute: If True, execute a Theano function that should call gemm.
+    :param verbose: If True, will print some Theano flags and env variables.
+    :param M,N,K: The M,N,K size used by gemm.
+    :param iters: The number of calls to gemm to do.

    :return: a tuple (execution time,
-                      str that represent the implementation used)
+                      str that represents the implementation used)
    """

    a = theano.shared(numpy.ones((M, N), dtype=theano.config.floatX,
@@ -43,20 +44,20 @@ def execute(execute=True, verbose=True, M=2000, N=2000, K=2000,
    f = theano.function([], updates={c: 0.4 * c + .8 * T.dot(a, b)})

    if verbose:
-        print 'Some theano flags:'
+        print 'Some Theano flags:'
        print '    blas.ldflags=', theano.config.blas.ldflags
        print '    compiledir=', theano.config.compiledir
        print '    floatX=', theano.config.floatX
-        print 'Some env flags:'
+        print 'Some environment variables:'
        print '    MKL_NUM_THREADS=', os.getenv('MKL_NUM_THREADS')
        print '    OMP_NUM_THREADS=', os.getenv('OMP_NUM_THREADS')
        print '    GOTO_NUM_THREADS=', os.getenv('GOTO_NUM_THREADS')
        print
-        print ('Numpy config: (used when the theano flags'
+        print ('Numpy config: (used when the Theano flag'
               ' "blas.ldflags" is empty)')
        numpy.show_config()
        print 'Numpy dot module:', numpy.dot.__module__
-        print 'Numpy file location that was loaded:', numpy.__file__
+        print 'Numpy location:', numpy.__file__
        print 'Numpy version:', numpy.__version__
        print
    t0 = 0
@@ -69,8 +70,8 @@ def execute(execute=True, verbose=True, M=2000, N=2000, K=2000,
              f.maker.env.toposort()]):
        impl = 'Used the gpu'
    else:
-        impl = 'ERROR, not able to tell if theano used the cpu or the gpu'
-        impl += f.maker.env.toposort()
+        impl = 'ERROR, unable to tell if Theano used the cpu or the gpu:\n'
+        impl += str(f.maker.env.toposort())

    if execute:
        t0 = time.time()
@@ -88,15 +89,18 @@ def jobman_job(state, channel):
 def test():
    execute()

-from optparse import OptionParser
-parser = OptionParser(usage='%prog <options>')
+
+parser = OptionParser(
+        usage='%prog <options>\nCompute time needed to perform BLAS gemm '
+              'computations between matrices of size (M, N) and (N, K).')

 parser.add_option('-q', '--quiet', action='store_true', dest='quiet',
                  default=False,
-                  help="If true, don't print the comparison table")
+                  help="If true, do not print the comparison table and config "
+                       "options")
 parser.add_option('--print_only', action='store_true', dest='print_only',
                  default=False,
-                  help="If true, don't do the gemm call")
+                  help="If true, do not perform gemm computations")
 parser.add_option('-M', '--M', action='store', dest='M',
                  default=2000, type="int",
                  help="The M size to gemm")
@@ -108,13 +112,13 @@ parser.add_option('-K', '--K', action='store', dest='K',
                  help="The K size to gemm")
 parser.add_option('--iter', action='store', dest='iter',
                  default=10, type="int",
-                  help="The number of call to gemm")
+                  help="The number of calls to gemm")
 parser.add_option('--order', action='store', dest='order',
                  default="C",
-                  help="The numpy order parameter used when creating the"
-                  " numpy.ndarray object. It accept 'C' for the c memory"
-                  " layout order and 'F' for the fortran order of all"
-                  " matrix.")
+                  help="The numpy memory layout parameter used when creating"
+                  " the numpy.ndarray objects. It accepts 'C' for C memory"
+                  " order and 'F' for Fortran order (for all matrices).")
+

 if __name__ == "__main__":
    options, arguments = parser.parse_args(sys.argv)
@@ -127,7 +131,7 @@ if __name__ == "__main__":
        print """
        Some results that you can compare against. They were 10 executions
        of gemm in float64 with matrices of shape 2000x2000 (M=N=K=2000).
-        All memory layout was in c order.
+        All memory layout was in C order.

        CPU tested: Xeon E5345(2.33Ghz, 8M L2 cache, 1333Mhz FSB),
                    Xeon E5430(2.66Ghz, 12M L2 cache, 1333Mhz FSB),
@@ -138,11 +142,11 @@ if __name__ == "__main__":
                    Xeon X5550(2.67GHz, 8M l2 cache?, hyper-threads enabled)


-        Lib tested:
-            * numpy with ATLAS from distribution(FC9) package (1 thread)
+        Libraries tested:
+            * numpy with ATLAS from distribution (FC9) package (1 thread)
            * manually compiled numpy and ATLAS with 2 threads
-            * goto 1.26 with 1, 2, 4 and 8 threads.
-            * goto2 1.13 compiled with multiple thread enabled.
+            * goto 1.26 with 1, 2, 4 and 8 threads
+            * goto2 1.13 compiled with multiple threads enabled

                          Xeon   Xeon   Xeon  Core2 i7    i7     Xeon   Xeon
        lib/nb threads    E5345  E5430  E5450 E8500 930   950    X5560  X5550
@@ -170,7 +174,7 @@ if __name__ == "__main__":
        goto2 1.13/16                                                     3.16s

        Test time in float32 with cuda 3.0.14
-        (cuda version 3.2RC and up have a faster gemm on the Fermi/GTX[45]??
+        (cuda version 3.2RC and up have a faster gemm on the Fermi/GTX[45]??)

        gpu/cuda version
        GTX580/3.2        0.20s
@@ -198,13 +202,13 @@ if __name__ == "__main__":
    else:
        print
        print "We executed", options.iter,
-        print "call to gemm with a and b matrix of shapes",
+        print "calls to gemm with a and b matrices of shapes",
        print "(%d, %d) and (%d, %d)." % (options.M, options.N,
-                                        options.N, options.K)
+                                          options.N, options.K)

        print
-        print 'Those executions time took %.2fs' % t
+        print 'Total execution time: %.2fs' % t
        print
-        print ('Try to run this script a few times. Experience show that'
-               ' the first time is not as fast as followings call. The'
+        print ('Try to run this script a few times. Experience shows that'
+               ' the first time is not as fast as followings calls. The'
               ' difference is not big, but consistent.')