提交 2964c26d authored 作者: Frederic Bastien's avatar Frederic Bastien

added a meta script to launch many comparaison of blas with one command.

上级 ae2dafef
...@@ -10,20 +10,23 @@ GTX 470 7.22s ...@@ -10,20 +10,23 @@ GTX 470 7.22s
GTX 285, 6.84s GTX 285, 6.84s
GTX 480 5.83s GTX 480 5.83s
""" """
import sys
import theano,numpy,time import theano,numpy,time
import theano.tensor as T import theano.tensor as T
shapes=(2000,2000) shapes=(2000,2000)
iters = 10 iters = 10
def execute(verbose=True): def execute(execute=True, verbose=True):
a=theano.shared(numpy.ones(shapes, dtype=theano.config.floatX)) a=theano.shared(numpy.ones(shapes, dtype=theano.config.floatX))
b=theano.shared(numpy.ones(shapes, dtype=theano.config.floatX)) b=theano.shared(numpy.ones(shapes, dtype=theano.config.floatX))
c=theano.shared(numpy.ones(shapes, dtype=theano.config.floatX)) c=theano.shared(numpy.ones(shapes, dtype=theano.config.floatX))
f=theano.function([],updates={c:0.4*c+.8*T.dot(a,b)}) f=theano.function([],updates={c:0.4*c+.8*T.dot(a,b)})
if verbose: if verbose:
print 'Some theano flags:' print 'Some theano flags:'
print ' blas.ldflags=',theano.config.blas.ldflags print ' blas.ldflags=',theano.config.blas.ldflags
...@@ -43,12 +46,15 @@ def execute(verbose=True): ...@@ -43,12 +46,15 @@ def execute(verbose=True):
else: else:
print 'ERROR, not able to tell if theano used the cpu or the gpu' print 'ERROR, not able to tell if theano used the cpu or the gpu'
print f.maker.env.toposort() print f.maker.env.toposort()
t0=0
t1=-1
if execute:
t0=time.time() t0=time.time()
for i in range(iters): for i in range(iters):
f() f()
t1=time.time() t1=time.time()
if verbose: if verbose and execute:
print print
print 'this execution time took %.2fs'%(t1-t0) print 'this execution time took %.2fs'%(t1-t0)
return t1-t0 return t1-t0
...@@ -63,26 +69,38 @@ def test(): ...@@ -63,26 +69,38 @@ def test():
if __name__ == "__main__": if __name__ == "__main__":
execute() verbose = True
print_only = False
if '--quiet' in sys.argv:
verbose = False
if '--print_only' in sys.argv:
print_only = True
t = execute(not print_only, verbose)
if verbose:
print """ print """
Some result that you can compare again. They where 10 executions of gemm in float64 with matrix of shape 2000x2000 on FC9. Some result that you can compare again. They where 10 executions of gemm in float64 with matrix of shape 2000x2000 on FC9.
We tested 3 cpus: Xeon E5345, Xeon E5430 and Xeon E5450 Cpu tested: Xeon E5345, Xeon E5430, Xeon E5450, Core 2 E8500, Core i7 930(hyper-threads enabled)
Lib tested: Lib tested:
* numpy with ATLAS from distribution(FC9) package (1 thread) * numpy with ATLAS from distribution(FC9) package (1 thread)
* manually compiled numpy and ATLAS with 2 threads * manually compiled numpy and ATLAS with 2 threads
* goto with 1, 2, 4 and 8 threads. * goto with 1, 2, 4 and 8 threads.
Xeon Xeno Xeon Core2 i7
lib/nb threads E5345(s) E5430(s) E5450(s) lib/nb threads E5345 E5430 E5450 E8500 930
numpy_FC9_atlas/1 39.2s 35.0s 30.7s numpy_FC9_atlas/1 39.2s 35.0s 30.7s 29.6s 21.5s
goto/1 18.7s 16.1s 14.2s goto/1 18.7s 16.1s 14.2s 13.7s 16.1s
numpy_MAN_atlas/2 12.0s 11.6s 10.2s numpy_MAN_atlas/2 12.0s 11.6s 10.2s 9.2s 9.0s
goto/2 9.5s 8.1s 7.1s goto/2 9.5s 8.1s 7.1s 7.3s 8.1s
goto/4 4.9s 4.4s 3.7s goto/4 4.9s 4.4s 3.7s - 4.1s
goto/8 2.7s 2.4s 2.0s goto/8 2.7s 2.4s 2.0s - 4.1s
""" """
print print
print "We timed",iters,"executions of gemm with matrix of shapes",shapes print "We timed",iters,"executions of gemm with matrix of shapes",shapes
else:
print t
#!/bin/bash
python misc/check_blas.py --print_only
cat /proc/cpuinfo |grep "model name" |uniq
cat /proc/cpuinfo |grep processor
free
uname -a
t0=`THEANO_FLAGS=blas.ldflags= OMP_NUM_THREADS=1 time python misc/check_blas.py --quiet`
t1=`OMP_NUM_THREADS=1 time python misc/check_blas.py --quiet`
t2=`OMP_NUM_THREADS=2 time python misc/check_blas.py --quiet`
t4=`OMP_NUM_THREADS=4 time python misc/check_blas.py --quiet`
t8=`OMP_NUM_THREADS=8 time python misc/check_blas.py --quiet`
echo "numpy gemm took: $t0"
echo "theano gemm 1 thread took: $t1"
echo "theano gemm 2 thread took: $t2"
echo "theano gemm 4 thread took: $t4"
echo "theano gemm 8 thread took: $t8"
#Fred to test distro numpy at LISA: PYTHONPATH=/u/bastienf/repos:/usr/lib64/python2.5/site-packages THEANO_FLAGS=blas.ldflags= OMP_NUM_THREADS=8 time python misc/check_blas.py
\ No newline at end of file
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论