提交 598a531f authored 作者: nouiz's avatar nouiz

Merge pull request #12 from delallea/check_blas_cosmetic

Improved overall presentation
...@@ -14,6 +14,7 @@ GTX 480 5.83s ...@@ -14,6 +14,7 @@ GTX 480 5.83s
import os import os
import sys import sys
import time import time
from optparse import OptionParser
import numpy import numpy
import theano import theano
...@@ -25,13 +26,13 @@ from theano.gof.python25 import any ...@@ -25,13 +26,13 @@ from theano.gof.python25 import any
def execute(execute=True, verbose=True, M=2000, N=2000, K=2000, def execute(execute=True, verbose=True, M=2000, N=2000, K=2000,
iters=10, order='C'): iters=10, order='C'):
""" """
:param execute: If True, execute a Theano function that should call gemm :param execute: If True, execute a Theano function that should call gemm.
:param verbose: If True, will print some Theano flags and env variable. :param verbose: If True, will print some Theano flags and env variables.
:param M,N,K: the M,N,K size used by gemm :param M,N,K: The M,N,K size used by gemm.
:param iters: the number of call to gemm to do :param iters: The number of calls to gemm to do.
:return: a tuple (execution time, :return: a tuple (execution time,
str that represent the implementation used) str that represents the implementation used)
""" """
a = theano.shared(numpy.ones((M, N), dtype=theano.config.floatX, a = theano.shared(numpy.ones((M, N), dtype=theano.config.floatX,
...@@ -43,20 +44,20 @@ def execute(execute=True, verbose=True, M=2000, N=2000, K=2000, ...@@ -43,20 +44,20 @@ def execute(execute=True, verbose=True, M=2000, N=2000, K=2000,
f = theano.function([], updates={c: 0.4 * c + .8 * T.dot(a, b)}) f = theano.function([], updates={c: 0.4 * c + .8 * T.dot(a, b)})
if verbose: if verbose:
print 'Some theano flags:' print 'Some Theano flags:'
print ' blas.ldflags=', theano.config.blas.ldflags print ' blas.ldflags=', theano.config.blas.ldflags
print ' compiledir=', theano.config.compiledir print ' compiledir=', theano.config.compiledir
print ' floatX=', theano.config.floatX print ' floatX=', theano.config.floatX
print 'Some env flags:' print 'Some environment variables:'
print ' MKL_NUM_THREADS=', os.getenv('MKL_NUM_THREADS') print ' MKL_NUM_THREADS=', os.getenv('MKL_NUM_THREADS')
print ' OMP_NUM_THREADS=', os.getenv('OMP_NUM_THREADS') print ' OMP_NUM_THREADS=', os.getenv('OMP_NUM_THREADS')
print ' GOTO_NUM_THREADS=', os.getenv('GOTO_NUM_THREADS') print ' GOTO_NUM_THREADS=', os.getenv('GOTO_NUM_THREADS')
print print
print ('Numpy config: (used when the theano flags' print ('Numpy config: (used when the Theano flag'
' "blas.ldflags" is empty)') ' "blas.ldflags" is empty)')
numpy.show_config() numpy.show_config()
print 'Numpy dot module:', numpy.dot.__module__ print 'Numpy dot module:', numpy.dot.__module__
print 'Numpy file location that was loaded:', numpy.__file__ print 'Numpy location:', numpy.__file__
print 'Numpy version:', numpy.__version__ print 'Numpy version:', numpy.__version__
print print
t0 = 0 t0 = 0
...@@ -69,8 +70,8 @@ def execute(execute=True, verbose=True, M=2000, N=2000, K=2000, ...@@ -69,8 +70,8 @@ def execute(execute=True, verbose=True, M=2000, N=2000, K=2000,
f.maker.env.toposort()]): f.maker.env.toposort()]):
impl = 'Used the gpu' impl = 'Used the gpu'
else: else:
impl = 'ERROR, not able to tell if theano used the cpu or the gpu' impl = 'ERROR, unable to tell if Theano used the cpu or the gpu:\n'
impl += f.maker.env.toposort() impl += str(f.maker.env.toposort())
if execute: if execute:
t0 = time.time() t0 = time.time()
...@@ -88,15 +89,18 @@ def jobman_job(state, channel): ...@@ -88,15 +89,18 @@ def jobman_job(state, channel):
def test(): def test():
execute() execute()
from optparse import OptionParser
parser = OptionParser(usage='%prog <options>') parser = OptionParser(
usage='%prog <options>\nCompute time needed to perform BLAS gemm '
'computations between matrices of size (M, N) and (N, K).')
parser.add_option('-q', '--quiet', action='store_true', dest='quiet', parser.add_option('-q', '--quiet', action='store_true', dest='quiet',
default=False, default=False,
help="If true, don't print the comparison table") help="If true, do not print the comparison table and config "
"options")
parser.add_option('--print_only', action='store_true', dest='print_only', parser.add_option('--print_only', action='store_true', dest='print_only',
default=False, default=False,
help="If true, don't do the gemm call") help="If true, do not perform gemm computations")
parser.add_option('-M', '--M', action='store', dest='M', parser.add_option('-M', '--M', action='store', dest='M',
default=2000, type="int", default=2000, type="int",
help="The M size to gemm") help="The M size to gemm")
...@@ -108,13 +112,13 @@ parser.add_option('-K', '--K', action='store', dest='K', ...@@ -108,13 +112,13 @@ parser.add_option('-K', '--K', action='store', dest='K',
help="The K size to gemm") help="The K size to gemm")
parser.add_option('--iter', action='store', dest='iter', parser.add_option('--iter', action='store', dest='iter',
default=10, type="int", default=10, type="int",
help="The number of call to gemm") help="The number of calls to gemm")
parser.add_option('--order', action='store', dest='order', parser.add_option('--order', action='store', dest='order',
default="C", default="C",
help="The numpy order parameter used when creating the" help="The numpy memory layout parameter used when creating"
" numpy.ndarray object. It accept 'C' for the c memory" " the numpy.ndarray objects. It accepts 'C' for C memory"
" layout order and 'F' for the fortran order of all" " order and 'F' for Fortran order (for all matrices).")
" matrix.")
if __name__ == "__main__": if __name__ == "__main__":
options, arguments = parser.parse_args(sys.argv) options, arguments = parser.parse_args(sys.argv)
...@@ -127,7 +131,7 @@ if __name__ == "__main__": ...@@ -127,7 +131,7 @@ if __name__ == "__main__":
print """ print """
Some results that you can compare against. They were 10 executions Some results that you can compare against. They were 10 executions
of gemm in float64 with matrices of shape 2000x2000 (M=N=K=2000). of gemm in float64 with matrices of shape 2000x2000 (M=N=K=2000).
All memory layout was in c order. All memory layout was in C order.
CPU tested: Xeon E5345(2.33Ghz, 8M L2 cache, 1333Mhz FSB), CPU tested: Xeon E5345(2.33Ghz, 8M L2 cache, 1333Mhz FSB),
Xeon E5430(2.66Ghz, 12M L2 cache, 1333Mhz FSB), Xeon E5430(2.66Ghz, 12M L2 cache, 1333Mhz FSB),
...@@ -138,11 +142,11 @@ if __name__ == "__main__": ...@@ -138,11 +142,11 @@ if __name__ == "__main__":
Xeon X5550(2.67GHz, 8M l2 cache?, hyper-threads enabled) Xeon X5550(2.67GHz, 8M l2 cache?, hyper-threads enabled)
Lib tested: Libraries tested:
* numpy with ATLAS from distribution(FC9) package (1 thread) * numpy with ATLAS from distribution (FC9) package (1 thread)
* manually compiled numpy and ATLAS with 2 threads * manually compiled numpy and ATLAS with 2 threads
* goto 1.26 with 1, 2, 4 and 8 threads. * goto 1.26 with 1, 2, 4 and 8 threads
* goto2 1.13 compiled with multiple thread enabled. * goto2 1.13 compiled with multiple threads enabled
Xeon Xeon Xeon Core2 i7 i7 Xeon Xeon Xeon Xeon Xeon Core2 i7 i7 Xeon Xeon
lib/nb threads E5345 E5430 E5450 E8500 930 950 X5560 X5550 lib/nb threads E5345 E5430 E5450 E8500 930 950 X5560 X5550
...@@ -170,7 +174,7 @@ if __name__ == "__main__": ...@@ -170,7 +174,7 @@ if __name__ == "__main__":
goto2 1.13/16 3.16s goto2 1.13/16 3.16s
Test time in float32 with cuda 3.0.14 Test time in float32 with cuda 3.0.14
(cuda version 3.2RC and up have a faster gemm on the Fermi/GTX[45]?? (cuda version 3.2RC and up have a faster gemm on the Fermi/GTX[45]??)
gpu/cuda version gpu/cuda version
GTX580/3.2 0.20s GTX580/3.2 0.20s
...@@ -198,13 +202,13 @@ if __name__ == "__main__": ...@@ -198,13 +202,13 @@ if __name__ == "__main__":
else: else:
print print
print "We executed", options.iter, print "We executed", options.iter,
print "call to gemm with a and b matrix of shapes", print "calls to gemm with a and b matrices of shapes",
print "(%d, %d) and (%d, %d)." % (options.M, options.N, print "(%d, %d) and (%d, %d)." % (options.M, options.N,
options.N, options.K) options.N, options.K)
print print
print 'Those executions time took %.2fs' % t print 'Total execution time: %.2fs' % t
print print
print ('Try to run this script a few times. Experience show that' print ('Try to run this script a few times. Experience shows that'
' the first time is not as fast as followings call. The' ' the first time is not as fast as followings calls. The'
' difference is not big, but consistent.') ' difference is not big, but consistent.')
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论