提交 f302a037 authored 作者: Olivier Delalleau's avatar Olivier Delalleau

Improved overall presentation

上级 93f2c09c
......@@ -14,6 +14,7 @@ GTX 480 5.83s
import os
import sys
import time
from optparse import OptionParser
import numpy
import theano
......@@ -25,13 +26,13 @@ from theano.gof.python25 import any
def execute(execute=True, verbose=True, M=2000, N=2000, K=2000,
iters=10, order='C'):
"""
:param execute: If True, execute a Theano function that should call gemm
:param verbose: If True, will print some Theano flags and env variable.
:param M,N,K: the M,N,K size used by gemm
:param iters: the number of call to gemm to do
:param execute: If True, execute a Theano function that should call gemm.
:param verbose: If True, will print some Theano flags and env variables.
:param M,N,K: The M,N,K size used by gemm.
:param iters: The number of calls to gemm to do.
:return: a tuple (execution time,
str that represent the implementation used)
str that represents the implementation used)
"""
a = theano.shared(numpy.ones((M, N), dtype=theano.config.floatX,
......@@ -43,20 +44,20 @@ def execute(execute=True, verbose=True, M=2000, N=2000, K=2000,
f = theano.function([], updates={c: 0.4 * c + .8 * T.dot(a, b)})
if verbose:
print 'Some theano flags:'
print 'Some Theano flags:'
print ' blas.ldflags=', theano.config.blas.ldflags
print ' compiledir=', theano.config.compiledir
print ' floatX=', theano.config.floatX
print 'Some env flags:'
print 'Some environment variables:'
print ' MKL_NUM_THREADS=', os.getenv('MKL_NUM_THREADS')
print ' OMP_NUM_THREADS=', os.getenv('OMP_NUM_THREADS')
print ' GOTO_NUM_THREADS=', os.getenv('GOTO_NUM_THREADS')
print
print ('Numpy config: (used when the theano flags'
print ('Numpy config: (used when the Theano flag'
' "blas.ldflags" is empty)')
numpy.show_config()
print 'Numpy dot module:', numpy.dot.__module__
print 'Numpy file location that was loaded:', numpy.__file__
print 'Numpy location:', numpy.__file__
print 'Numpy version:', numpy.__version__
print
t0 = 0
......@@ -69,8 +70,8 @@ def execute(execute=True, verbose=True, M=2000, N=2000, K=2000,
f.maker.env.toposort()]):
impl = 'Used the gpu'
else:
impl = 'ERROR, not able to tell if theano used the cpu or the gpu'
impl += f.maker.env.toposort()
impl = 'ERROR, unable to tell if Theano used the cpu or the gpu:\n'
impl += str(f.maker.env.toposort())
if execute:
t0 = time.time()
......@@ -88,15 +89,18 @@ def jobman_job(state, channel):
def test():
execute()
from optparse import OptionParser
parser = OptionParser(usage='%prog <options>')
parser = OptionParser(
usage='%prog <options>\nCompute time needed to perform BLAS gemm '
'computations between matrices of size (M, N) and (N, K).')
parser.add_option('-q', '--quiet', action='store_true', dest='quiet',
default=False,
help="If true, don't print the comparison table")
help="If true, do not print the comparison table and config "
"options")
parser.add_option('--print_only', action='store_true', dest='print_only',
default=False,
help="If true, don't do the gemm call")
help="If true, do not perform gemm computations")
parser.add_option('-M', '--M', action='store', dest='M',
default=2000, type="int",
help="The M size to gemm")
......@@ -108,13 +112,13 @@ parser.add_option('-K', '--K', action='store', dest='K',
help="The K size to gemm")
parser.add_option('--iter', action='store', dest='iter',
default=10, type="int",
help="The number of call to gemm")
help="The number of calls to gemm")
parser.add_option('--order', action='store', dest='order',
default="C",
help="The numpy order parameter used when creating the"
" numpy.ndarray object. It accept 'C' for the c memory"
" layout order and 'F' for the fortran order of all"
" matrix.")
help="The numpy memory layout parameter used when creating"
" the numpy.ndarray objects. It accepts 'C' for C memory"
" order and 'F' for Fortran order (for all matrices).")
if __name__ == "__main__":
options, arguments = parser.parse_args(sys.argv)
......@@ -127,7 +131,7 @@ if __name__ == "__main__":
print """
Some results that you can compare against. They were 10 executions
of gemm in float64 with matrices of shape 2000x2000 (M=N=K=2000).
All memory layout was in c order.
All memory layout was in C order.
CPU tested: Xeon E5345(2.33Ghz, 8M L2 cache, 1333Mhz FSB),
Xeon E5430(2.66Ghz, 12M L2 cache, 1333Mhz FSB),
......@@ -138,11 +142,11 @@ if __name__ == "__main__":
Xeon X5550(2.67GHz, 8M l2 cache?, hyper-threads enabled)
Lib tested:
* numpy with ATLAS from distribution(FC9) package (1 thread)
Libraries tested:
* numpy with ATLAS from distribution (FC9) package (1 thread)
* manually compiled numpy and ATLAS with 2 threads
* goto 1.26 with 1, 2, 4 and 8 threads.
* goto2 1.13 compiled with multiple thread enabled.
* goto 1.26 with 1, 2, 4 and 8 threads
* goto2 1.13 compiled with multiple threads enabled
Xeon Xeon Xeon Core2 i7 i7 Xeon Xeon
lib/nb threads E5345 E5430 E5450 E8500 930 950 X5560 X5550
......@@ -170,7 +174,7 @@ if __name__ == "__main__":
goto2 1.13/16 3.16s
Test time in float32 with cuda 3.0.14
(cuda version 3.2RC and up have a faster gemm on the Fermi/GTX[45]??
(cuda version 3.2RC and up have a faster gemm on the Fermi/GTX[45]??)
gpu/cuda version
GTX580/3.2 0.20s
......@@ -198,13 +202,13 @@ if __name__ == "__main__":
else:
print
print "We executed", options.iter,
print "call to gemm with a and b matrix of shapes",
print "calls to gemm with a and b matrices of shapes",
print "(%d, %d) and (%d, %d)." % (options.M, options.N,
options.N, options.K)
options.N, options.K)
print
print 'Those executions time took %.2fs' % t
print 'Total execution time: %.2fs' % t
print
print ('Try to run this script a few times. Experience show that'
' the first time is not as fast as followings call. The'
print ('Try to run this script a few times. Experience shows that'
' the first time is not as fast as followings calls. The'
' difference is not big, but consistent.')
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论