提交 fba3e888 authored 作者: Frédéric Bastien's avatar Frédéric Bastien 提交者: GitHub

Merge pull request #5609 from nouiz/cleanup

Remove the old benchmark directory.
The code written by James Bergstra in this directory has been superseded
by the scipy benchmarking code, which is in another repository:
https://github.com/jaberg/DeepLearningBenchmarks
The remainder of the code in this directory remains undocumented.
aa.x : aa.cc
g++ -O3 -ffast-math aa.cc -o aa.x -L${PUB_PREFIX}/lib -lgsl ${THEANO_BLAS_LDFLAGS}
clean :
rm aa.x
/*
*
* g++ -O2 -ffast-math -I$PUB_PREFIX/include aa.cc -o aa.x -lgsl -lgslcblas
*
* g++ -O2 -ffast-math -I$PUB_PREFIX/include aa.cc -o aa.x -L$PUB_PREFIX/lib -lgsl -lcblas -lgoto -lgfortran
*
* ./aa.x 10 5 7 1000
*
* */
#include <cassert>
#include <cstdlib>
#include <cstdio>
#include <cmath>
#include <gsl/gsl_rng.h>
#include <gsl/gsl_blas.h>
#include <time.h>
#include <sys/time.h>
double pytime(const struct timeval * tv)
{
return (double) tv->tv_sec + (double) tv->tv_usec / 1000000.0;
}
int main(int argc, char **argv)
{
assert(argc == 5);
int neg = strtol(argv[1], 0, 0);
int nout = strtol(argv[2], 0, 0);
int nin = nout;
int nhid = strtol(argv[3], 0, 0);
int niter = strtol(argv[4], 0, 0);
double lr = 0.01;
gsl_rng * rng = gsl_rng_alloc (gsl_rng_taus);
gsl_rng_set(rng, 234);
gsl_matrix * x = gsl_matrix_alloc(neg, nin);
gsl_matrix * w = gsl_matrix_alloc(nin, nhid);
gsl_vector * a = gsl_vector_alloc(nhid);
gsl_vector * b = gsl_vector_alloc(nout);
gsl_matrix * xw = gsl_matrix_alloc(neg, nhid);
gsl_matrix * hid = gsl_matrix_alloc(neg, nhid);
gsl_matrix * hidwt = gsl_matrix_alloc(neg, nout);
gsl_matrix * g_hidwt = gsl_matrix_alloc(neg, nout);
gsl_matrix * g_hid = gsl_matrix_alloc(neg, nhid);
gsl_matrix * g_w = gsl_matrix_alloc(nout, nhid);
gsl_vector * g_b = gsl_vector_alloc(nout);
for (int i = 0; i < neg*nout; ++i) x->data[i] = (gsl_rng_uniform(rng) -0.5)*1.5;
for (int i = 0; i < nout*nhid; ++i) w->data[i] = gsl_rng_uniform(rng);
for (int i = 0; i < nhid; ++i) a->data[i] = 0.0;
for (int i = 0; i < nout; ++i) b->data[i] = 0.0;
//
//
//
//
struct timeval tv0, tv1;
struct timeval tdot0, tdot1;
double time_of_dot = 0.0;
gettimeofday(&tv0, 0);
double err = 0.0;
for (int iter = 0; iter < niter; ++iter)
{
gettimeofday(&tdot0, 0);
gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, x, w, 0.0, xw);
gettimeofday(&tdot1, 0);
time_of_dot += pytime(&tdot1) - pytime(&tdot0);
for (int i = 0; i < neg; ++i)
for (int j = 0; j < nhid; ++j)
{
double act = xw->data[i*nhid+j] + a->data[j];
hid->data[i*nhid+j] = tanh(act);
}
gettimeofday(&tdot0, 0);
gsl_blas_dgemm(CblasNoTrans, CblasTrans, 1.0, hid, w, 0.0, hidwt);
gettimeofday(&tdot1, 0);
time_of_dot += pytime(&tdot1) - pytime(&tdot0);
for (int i = 0; i < nout; ++i) g_b->data[i] = 0.0;
err = 0.0;
for (int i = 0; i < neg; ++i)
for (int j = 0; j < nout; ++j)
{
double act = hidwt->data[i*nout+j] + b->data[j];
double out = tanh(act);
double g_out = out - x->data[i*nout+j];
err += g_out * g_out;
g_hidwt->data[i*nout+j] = g_out * (1.0 - out*out);
g_b->data[j] += g_hidwt->data[i*nout+j];
}
for (int i = 0; i < nout; ++i) b->data[i] -= lr * g_b->data[i];
if (1)
{
gettimeofday(&tdot0, 0);
gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, g_hidwt, w, 0.0, g_hid);
gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, g_hidwt, hid, 0.0, g_w);
gettimeofday(&tdot1, 0);
time_of_dot += pytime(&tdot1) - pytime(&tdot0);
for (int i = 0; i < neg; ++i)
for (int j = 0; j < nhid; ++j)
{
g_hid->data[i*nhid+j] *= (1.0 - hid->data[i*nhid+j] * hid->data[i*nhid+j]);
a->data[j] -= lr * g_hid->data[i*nhid+j];
}
gettimeofday(&tdot0, 0);
gsl_blas_dgemm(CblasTrans, CblasNoTrans, -lr, x, g_hid, 1.0, w);
gettimeofday(&tdot1, 0);
time_of_dot += pytime(&tdot1) - pytime(&tdot0);
for (int i = 0; i < nout*nhid; ++i) w->data[i] -= lr * g_w->data[i];
}
}
gettimeofday(&tv1, 0);
double total_time = pytime(&tv1) - pytime(&tv0);
fprintf(stdout, "took = %lfs to get err %lf\n", total_time, 0.5 * err);
fprintf(stdout, "... of which %.2lfs was spent in dgemm (fraction: %.2lf)\n", time_of_dot, time_of_dot / total_time);
//skip freeing
return 0;
}
#!/usr/bin/env python
from __future__ import absolute_import, print_function, division
import numpy as np
import sys
import time
import theano
import theano.tensor as T
import theano.sandbox
from six.moves import xrange
from theano.compile import module, Mode
from theano import gof, Op, Apply
from theano.tensor import blas, opt
# numpy: aa_numpy.py
# c : aa.cc
if 0:
class Opt(object):
merge = theano.gof.MergeOptimizer()
gemm_opt_1 = theano.gof.TopoOptimizer(theano.tensor_opt.gemm_pattern_1)
gemm_opt_2 = theano.gof.TopoOptimizer( # d -= a * (dot()+transpose(dot))
theano.gof.PatternSub(
(
T.sub_inplace,
'd',
(
T.mul,
dict(pattern = (T.DimShuffle((), ['x', 'x'], inplace = True), 'a'),
allow_multiple_clients = True),
(
T.add,
(T.dot, 'b', 'c'),
(T.transpose_inplace, (T.dot, 'f', 'g'))
)
)
),
(
T.gemm,
(
T.gemm,
'd',
(T.neg, 'a'),
(T.transpose_inplace, 'g'),
(T.transpose_inplace, 'f'),
T.constant(1.0)
),
(T.neg, 'a'),
'b',
'c',
T.constant(1.0)
),
allow_multiple_clients = False))
sqr = []
sqr.append( theano.gof.TopoOptimizer(
theano.gof.PatternSub(
(T.mul,'x', 'x'),
(T.sqr, 'x'), allow_multiple_clients=True)))
sqr.append(theano.gof.TopoOptimizer(
theano.gof.PatternSub(
(T.pow, 'x', (T.DimShuffle((), ['x', 'x'], inplace=True), T.constant(2))),
(T.sqr, 'x'), allow_multiple_clients=True)))
ident_opt_list = []
ident_opt_list.append( # remove explicit copies
theano.gof.TopoOptimizer(
theano.gof.PatternSub(
(T.tensor_copy, 'x'),
'x',
allow_multiple_clients=True)))
ident_opt_list.append( # remove double-transpose
theano.gof.TopoOptimizer(
theano.gof.PatternSub(
(T.transpose_inplace, (T.transpose_inplace, 'x')),
'x',
allow_multiple_clients=True)))
ident_opt_list.append(
theano.gof.TopoOptimizer(
theano.gof.PatternSub(
(T.sqr, (T.sqrt,'x')),
'x',
allow_multiple_clients=True)))
ident_opt_list.append(
theano.gof.TopoOptimizer(
theano.gof.PatternSub(
(T.sqrt, (T.sqr,'x')),
'x',
allow_multiple_clients=True)))
ident_opt_list.append(
theano.gof.TopoOptimizer(
theano.gof.PatternSub(
(T.mul, 'x', (T.div,'y', 'x')),
'y',
allow_multiple_clients=True)))
ident_opt_list.append(
theano.gof.TopoOptimizer(
theano.gof.PatternSub(
(T.mul, (T.div,'y', 'x'), 'x'),
'y',
allow_multiple_clients=True)))
ident_opt_list.append(
theano.gof.TopoOptimizer(
theano.gof.PatternSub(
(T.div, (T.mul,'y', 'x'), 'x'),
'y',
allow_multiple_clients=True)))
ident_opt_list.append(
theano.gof.TopoOptimizer(
theano.gof.PatternSub(
(T.div, (T.mul,'y', 'x'), 'y'),
'x',
allow_multiple_clients=True)))
def __call__(self, env):
self.merge(env)
#eliminate identities
if 0:
print('SKIPPING optimizations')
else:
for opt in self.ident_opt_list:
opt(env)
for opt in self.sqr:
opt(env)
self.gemm_opt_1(env)
self.gemm_opt_2(env)
self.merge(env)
def print_graph_linker(print_prog=True):
if 1:
imap = {None:'-'}
def blah(i, node, thunk):
imap[node] = str(i)
if print_prog:# and node.op.__class__ is T.DimShuffle:
if False and node.op == T.DimShuffle((), ['x', 'x'], inplace = True):
print(node.op == T.DimShuffle((), ['x', 'x'],
inplace=True), end=' ')
print(node.inputs[0], type(node.inputs[0]), end=' ')
print(node.inputs[0].equals(T.constant(2)), end=' ')
outputs = node.outputs
inputs = theano.gof.graph.inputs(outputs)
print('node ', i, node, end=' ')
print(':'.join([imap[inp.owner] for inp in node.inputs]))
#print theano.sandbox.pprint.pp.process_graph(inputs, outputs)
return theano.sandbox.wraplinker.WrapLinkerMany(
[theano.gof.OpWiseCLinker()],
[theano.sandbox.wraplinker.run_all
,blah
#,theano.sandbox.wraplinker.numpy_notall_isfinite
])
else:
return theano.gof.OpWiseCLinker()
class M(module.Module):
def __init__(self):
super(M, self).__init__()
x = T.matrix('x') # input, target
self.w = module.Member(T.matrix('w')) # weights
self.a = module.Member(T.vector('a')) # hid bias
self.b = module.Member(T.vector('b')) # output bias
self.hid = T.tanh(T.dot(x, self.w) + self.a)
hid = self.hid
self.out = T.tanh(T.dot(hid, self.w.T) + self.b)
out = self.out
self.err = 0.5 * T.sum((out - x)**2)
err = self.err
params = [self.w, self.a, self.b]
gparams = T.grad(err, params)
updates = [(p, p - 0.01 * gp) for p, gp in zip(params, gparams)]
self.step = module.Method([x], err, updates=dict(updates))
mod = M()
mode = 'FAST_RUN'
mode = Mode(optimizer='fast_run', linker=theano.gof.OpWiseCLinker(nice_errors=True))
mode = Mode(optimizer='fast_run', linker='c')
mode = Mode(optimizer='fast_run', linker='c|py')
print(mod.pretty(mode=mode))
m = mod.make(mode=mode)
neg, nout, nhid, niter = [int(a) for a in sys.argv[1:]]
rng = np.random.RandomState(342)
m.w = rng.rand(nout, nhid)
m.a = rng.randn(nhid) * 0.0
m.b = rng.randn(nout) * 0.0
x = (rng.rand(neg, nout)-0.5) * 1.5
t = time.time()
for i in xrange(niter):
err = m.step(x)
print('time: ',time.time() - t, 'err: ', err)
try:
mode.print_summary()
pass
except:
pass
#!/usr/bin/env python
from __future__ import absolute_import, print_function, division
import numpy as np
import sys
import time
from six.moves import xrange
# c: aa.cc
neg, nout, nhid, niter = [int(a) for a in sys.argv[1:]]
lr = 0.01
rng = np.random.RandomState(342)
w = rng.rand(nout, nhid)
a = rng.randn(nhid) * 0.0
b = rng.randn(nout) * 0.0
x = (rng.rand(neg, nout)-0.5) * 1.5
dot_time = 0.0
t = time.time()
for i in xrange(niter):
tt = time.time()
d = np.dot(x, w)
dot_time += time.time() - tt
hid = np.tanh(d + a)
tt = time.time()
d = np.dot(hid, w.T)
dot_time += time.time() - tt
out = np.tanh(d + b)
g_out = out - x
err = 0.5 * np.sum(g_out**2)
g_hidwt = g_out * (1.0 - out**2)
b -= lr * np.sum(g_hidwt, axis=0)
tt = time.time()
g_hid = np.dot(g_hidwt, w)
dot_time += time.time() - tt
g_hidin = g_hid * (1.0 - hid**2)
tt = time.time()
d = np.dot(g_hidwt.T, hid)
dd = np.dot(x.T, g_hidin)
dot_time += time.time() - tt
gw = (d + dd)
w -= lr * gw
a -= lr * np.sum(g_hidin, axis=0)
total_time = time.time() - t
print('time: ',total_time, 'err: ', err)
print(' of which', dot_time, 'was spent on dot. Fraction:', dot_time / total_time)
#!/bin/bash
python opencv.py $@
python conv2d.py $@
python scipy_conv.py $@
echo "WARNING the mode is valid for theano and scipy, but opencv use the mode same! Can opencv do the mode full?"
from __future__ import absolute_import, print_function, division
import sys, timeit, time
import numpy as np
import theano, theano.tensor.signal.conv
try:
img_shape = int(sys.argv[1]), int(sys.argv[2])
ker_shape = int(sys.argv[3]), int(sys.argv[4])
dtype = sys.argv[5]
except:
print("Usage: %s <img rows> <img cols> <ker rows> <ker cols> <dtype> [nb_call]" % sys.argv[0], file=sys.stderr)
sys.exit(-1)
nb_call = 1
if len(sys.argv)>6:
nb_call=int(sys.argv[6])
setup="""
import sys, timeit, time
import numpy as np
import theano, theano.tensor.signal.conv
img_shape = int(sys.argv[1]), int(sys.argv[2])
ker_shape = int(sys.argv[3]), int(sys.argv[4])
dtype = sys.argv[5]
img = theano.shared(np.ones(img_shape, dtype=dtype))
ker = theano.shared(np.ones(ker_shape, dtype=dtype))
out = theano.shared(np.ones((2,2,2), dtype=dtype))
"""
T = timeit.Timer("f()",
setup+"f = theano.function([], theano.tensor.signal.conv.conv2d(img, ker))")
time_without_shape = T.repeat(repeat=3, number=nb_call)
print(min(time_without_shape), 'theano without shape')
T = timeit.Timer("f()", setup+"""f = theano.function([], [],
updates={out:theano.tensor.signal.conv.conv2d(img,
ker,image_shape=img_shape,filter_shape=ker_shape)})""")
time_with_shape = T.repeat(repeat=3, number=nb_call)
print(min(time_with_shape), 'theano with shape')
from __future__ import absolute_import, print_function, division
import sys, timeit
import numpy as np
import scikits.image.opencv
try:
img_shape = int(sys.argv[1]), int(sys.argv[2])
ker_shape = int(sys.argv[3]), int(sys.argv[4])
dtype = sys.argv[5]
except:
print("Usage: %s <img rows> <img cols> <ker rows> <ker cols> <dtype> [nb_call]" % sys.argv[0], file=sys.stderr)
sys.exit(-1)
nb_call = 1
if len(sys.argv)>6:
nb_call=int(sys.argv[6])
T = timeit.Timer("f()","""
import scikits.image.opencv, sys, numpy as np
img_shape = int(sys.argv[1]), int(sys.argv[2])
ker_shape = int(sys.argv[3]), int(sys.argv[4])
dtype = sys.argv[5]
img = np.ones(img_shape, dtype=dtype)
ker = np.ones(ker_shape, dtype=dtype)
def f():
scikits.image.opencv.cvFilter2D(img, ker)
""")
time = T.repeat(repeat=3, number=nb_call)
print(min(time), "opencv")
from __future__ import absolute_import, print_function, division
import sys, timeit
try:
img_shape = int(sys.argv[1]), int(sys.argv[2])
ker_shape = int(sys.argv[3]), int(sys.argv[4])
dtype = sys.argv[5]
except:
print("Usage: %s <img rows> <img cols> <ker rows> <ker cols> <dtype> [nb_call]" % sys.argv[0], file=sys.stderr)
sys.exit(-1)
nb_call = 1
if len(sys.argv)>6:
nb_call=int(sys.argv[6])
T = timeit.Timer("f()","""
from scipy.signal import convolve2d
import numpy
img_shape = int(sys.argv[1]), int(sys.argv[2])
ker_shape = int(sys.argv[3]), int(sys.argv[4])
dtype = sys.argv[5]
img = numpy.ones(img_shape, dtype=dtype)
ker = numpy.ones(ker_shape, dtype=dtype)
def f():
convolve2d(img, ker, mode="valid")
""")
time = T.repeat(repeat=3, number=nb_call)
print(min(time), "scipy")
from __future__ import absolute_import, print_function, division
import numpy as np
import numexpr as ne
import timeit
import theano
import theano.tensor as T
try:
import pylab
import matplotlib.pyplot as pyplot
except ImportError:
pass
def timeit_2vector_theano(init, nb_element=1e6, nb_repeat=3, nb_call=int(1e2), expr="a**2 + b**2 + 2*a*b"):
t3 = timeit.Timer("tf(av,bv)",
"""
import theano
import theano.tensor as T
import numexpr as ne
from theano.tensor import exp
%(init)s
av=a
bv=b
a=T.dvector()
b=T.dvector()
tf= theano.function([a,b],%(expr)s)
"""%locals()
)
ret=t3.repeat(nb_repeat,nb_call)
return np.asarray(ret)
def timeit_2vector(nb_element=1e6, nb_repeat=3, nb_call=int(1e2), expr="a**2 + b**2 + 2*a*b", do_unalign=False, do_amd=True):
"""Returns a dictionary whose keys are implementations ('numpy', 'numexpr', 'theano', etc.)
and whose values are numpy arrays of times taken to evalute the given problem.
"""
rval = dict()
print()
print("timeit_2vector(nb_element=%(nb_element)s,nb_repeat=%(nb_repeat)s,nb_call=%(nb_call)s, expr=%(expr)s, do_unalign=%(do_unalign)s)"%locals())
if do_unalign:
init = "import numpy as np; a = np.empty(%(nb_element)s, dtype='b1,f8')['f1'];b = np.empty(%(nb_element)s, dtype='b1,f8')['f1'];a[:] = np.arange(len(a));b[:] = np.arange(len(b));"%locals()
else:
init = "import numpy as np; a = np.arange(%(nb_element)s);b = np.arange(%(nb_element)s)"%locals()
t1 = timeit.Timer("%(expr)s"%locals(),"from numpy import exp; %(init)s"%locals())
numpy_times = np.asarray(t1.repeat(nb_repeat,nb_call))
print("NumPy time: each time=",numpy_times, "min_time=", numpy_times.min())
rval['numpy'] = numpy_times
t2 = timeit.Timer("""ne.evaluate("%(expr)s")"""%locals(),
"import numexpr as ne; %(init)s"%locals())
numexpr_times=np.asarray(t2.repeat(nb_repeat,nb_call))
rval['numexpr'] = numexpr_times
print("Numexpr time: each time=",numexpr_times,'min_time=', numexpr_times.min())
theano.config.lib.amdlibm = False
theano_times = timeit_2vector_theano(init, nb_element,nb_repeat,nb_call,expr)
print("Theano time: each time=",theano_times, 'min_time=',theano_times.min())
rval['theano'] = theano_times
if do_amd:
theano.config.lib.amdlibm = True
theanoamd_times = timeit_2vector_theano(init, nb_element,nb_repeat,nb_call,expr)
print("Theano+amdlibm time",theanoamd_times, theanoamd_times.min())
rval['theano_amd'] = theanoamd_times
print("time(NumPy) / time(numexpr) = ",numpy_times.min()/numexpr_times.min())
print("time(NumPy) / time(Theano)",numpy_times.min()/theano_times.min())
print("time(numexpr) / time(Theano)",numexpr_times.min()/theano_times.min())
if do_amd:
print("time(NumPy) / time(Theano+amdlibm)",numpy_times.min()/theanoamd_times.min())
print("time(numexpr) / time(Theano+amdlibm)",numexpr_times.min()/theanoamd_times.min())
return rval
def exec_timeit_2vector(expr, nb_call_scal=1, fname=None, do_unalign=False, do_amd=True):
#exp = [(1,100000),(1e1,100000),(1e2,100000),(1e3,100000), (5e3,50000),
exp = [(1e3,100000),(5e3,50000), \
(1e4,10000),(5e4,5000),(1e5,2000),(1e6,200),(1e7,10)
]
exp = [(1e3,100000),(5e3,50000)]
runtimes=[]
for nb_e, nb_c in exp:
runtimes.append(timeit_2vector(nb_element=nb_e, nb_repeat=3, nb_call=nb_c*nb_call_scal, expr=expr, do_amd=do_amd))
if do_unalign:
runtimes_unalign=[]
for nb_e, nb_c in exp:
runtimes_unalign.append(timeit_2vector(nb_element=nb_e, nb_repeat=3, nb_call=nb_c*nb_call_scal, expr=expr, do_unalign=True, do_amd=do_amd))
print('Runtimes list = ', runtimes)
numexpr_speedup = np.asarray([t['numpy'].min()/t['numexpr'].min() for t in runtimes],"float32")
print("time(NumPy) / time(numexpr)", end=' ')
print(numexpr_speedup, numexpr_speedup.min(), numexpr_speedup.max())
theano_speedup = np.asarray([t['numpy'].min()/t['theano'].min() for t in runtimes],"float32")
print("time(NumPy) / time(Theano)", end=' ')
print(theano_speedup, theano_speedup.min(), theano_speedup.max())
theano_numexpr_speedup = np.asarray([t['numexpr'].min()/t['theano'].min() for t in runtimes],"float32")
print("time(numexpr) / time(Theano)", end=' ')
print(theano_numexpr_speedup, theano_numexpr_speedup.min(), theano_numexpr_speedup.max())
if do_amd:
theano_speedup2 = np.asarray([t['numpy'].min()/t['theano_amd'].min() for t in runtimes],"float32")
print("time(NumPy) / time(theano+amdlibm)", end=' ')
print(theano_speedup,theano_speedup.min(),theano_speedup.max())
theano_numexpr_speedup2 = np.asarray([t['numexpr'].min()/t['theano_amd'].min() for t in runtimes],"float32")
print("time(numexpr) / time(theano+amdlibm)", end=' ')
print(theano_numexpr_speedup, theano_numexpr_speedup.min(), theano_numexpr_speedup.max())
if 'pylab' not in globals():
return
nb_calls=[e[0] for e in exp]
for cmp in range(1,len(time[0])):
speedup = np.asarray([t[0].min()/t[cmp].min() for t in time],"float32")
pylab.semilogx(nb_calls, speedup, linewidth=1.0)
if do_unalign:
for cmp in range(1,len(time[0])):
speedup = np.asarray([t[0].min()/t[cmp].min() for t in time_unalign],"float32")
pylab.semilogx(nb_calls, speedup, linewidth=1.0)
pylab.axhline(y=1, linewidth=1.0, color='black')
pylab.xlabel('Dimension of real valued vectors a and b')
pylab.ylabel('Speed up vs NumPy')
if do_unalign and do_amd:
pylab.legend(("Numexpr","Theano","Theano(amdlibm)", "Numexpr(unalign)",
"Theano(unalign)","Theano(amdlibm,unalign)"),loc='upper left')
elif do_unalign and not do_amd:
pylab.legend(("Numexpr","Theano","Numexpr(unalign)",
"Theano(unalign)",),loc='upper left')
elif not do_unalign and do_amd:
pylab.legend(("Numexpr","Theano","Theano(amdlibm)"),loc='upper left')
else:
pylab.legend(("Numexpr","Theano"),loc='upper left')
pylab.grid(True)
if fname:
pylab.savefig(fname)
pylab.clf()
else:
pylab.show()
def execs_timeit_2vector(exprs, fname=None):
"""
exprs is a list of list of expr to evaluate
The first level of list is put into different graph section in the same graph.
The second level is the expression to put in each section
"""
#exp = [(1,100000),(1e1,100000),(1e2,100000),(1e3,100000), (5e3,50000),
exp = [(1e3,100000),(5e3,50000), \
(1e4,10000),(5e4,5000),(1e5,2000),(1e6,200),(1e7,10)
]
### TO TEST UNCOMMENT THIS LINE
# exp = [(1,1000),(1e1,1000),(1e2,1000),]
times=[]
str_expr=[]
for g_exprs in exprs:
for expr in g_exprs:
nb_call_scal=1
if isinstance(expr,tuple):
nb_call_scal=expr[1]
expr = expr[0]
str_expr.append(expr)
time=[]
for nb_e, nb_c in exp:
time.append(timeit_2vector(nb_element=nb_e, nb_repeat=3, nb_call=nb_c*nb_call_scal, expr=expr, do_amd=False))
times.append(time)
if 'pylab' not in globals():
return
nb_calls=[e[0] for e in exp]
legends=[]
colors=['b','r','g','c', 'm', 'y']
assert len(colors)>=len(times)
fig = pylab.figure()
for idx,(time,expr) in enumerate(zip(times,str_expr)):
###
###
###
# Creating each subplot
###
###
###
###
pylab.subplot(220+idx+1)
pylab.subplots_adjust(wspace=0.25, hspace=0.25)
#legend=[]
#plot = fig.add_subplot(1,len(exprs),idx)
speedup = [t["numpy"].min()/t["numexpr"].min() for t in time]
pylab.semilogx(nb_calls, speedup, linewidth=1.0, color='r')
speedup = [t["numpy"].min()/t["theano"].min() for t in time]
pylab.semilogx(nb_calls, speedup, linewidth=1.0, color = 'b')
pylab.grid(True)
if (idx == 2) or (idx == 3):
pylab.xlabel('Dimension of vectors a and b', fontsize = 15)
if (idx == 0) or (idx == 2):
pylab.ylabel('Speed up vs NumPy', fontsize = 15)
pylab.axhline(y=1, linewidth=1.0, color='black')
pylab.xlim(1e3,1e7)
pylab.xticks([1e3,1e5,1e7],['1e3','1e5','1e7'])
pylab.title(expr)
if fname:
fig.savefig(fname)
pylab.clf()
else:
pylab.show()
execs_timeit_2vector([
["a**2 + b**2 + 2*a*b",
"2*a + 3*b",
"a+1",],
[("2*a + b**10",.2)]
#"2*a + b*b*b*b*b*b*b*b*b*b",
#("2*a + exp(b)",.3),
],fname="multiple_graph.pdf"
)
###
### This case is the one gived on numexpr web site(http://code.google.com/p/numexpr/) as of 16 June 2010
### a**2 + b**2 + 2*a*b
#exec_timeit_2vector("a**2 + b**2 + 2*a*b",fname="speedup_numexpr_mulpow2vec.png", do_amd=False)
###
### This case is the one gived on numexpr web site(http://code.google.com/p/numexpr/wiki/Overview) as of 16 June 2010
### 2*a + 3*b
#exec_timeit_2vector("2*a + 3*b",fname="speedup_numexpr_mul2vec.png", do_amd=False)
###
### This case is the one gived on numexpr web site(http://code.google.com/p/numexpr/wiki/Overview) as of 16 June 2010
### 2*a + b**10
#exec_timeit_2vector("2*a + b**10",.2,fname="speedup_numexpr_mulpow2vec_simple.png")
#exec_timeit_2vector("2*a + b*b*b*b*b*b*b*b*b*b",fname="speedup_numexpr_mulpow2vec_simpleV2.png", do_amd=False)
###
### We try to see if the pow optimized speed is available for exp too.
### 2*a + exp(b)
#exec_timeit_2vector("2*a + exp(b)",.3,fname="speedup_numexpr_mulexp2vec.png")
###
### The simplest case where we should show the overhead at its maximum effect
### a+1
#exec_timeit_2vector("a+1",fname="speedup_numexpr_add1vec.png")
#exec_timeit_2vector("a+1",.2,fname="speedup_numexpr_add1vec_unalign.png",do_unalign=True, do_amd=False)
#exec_timeit_2vector("2*a + b**10",.1,fname="speedup_numexpr_mulpow2vec_simple_unalign.png",do_unalign=True)
from __future__ import absolute_import, print_function, division
import theano
import numpy as np
from theano import tensor as T
from theano.tensor import nnet as NN
from six.moves import xrange
from theano.compile import module as M
class RegressionLayer(M.Module):
def __init__(self, input = None, target = None, regularize = True):
super(RegressionLayer, self).__init__() #boilerplate
# MODEL CONFIGURATION
self.regularize = regularize
# ACQUIRE/MAKE INPUT AND TARGET
if not input:
input = T.matrix('input')
if not target:
target = T.matrix('target')
# HYPER-PARAMETERS
self.stepsize = T.scalar() # a stepsize for gradient descent
# PARAMETERS
self.w = T.matrix() #the linear transform to apply to our input points
self.b = T.vector() #a vector of biases, which make our transform affine instead of linear
# REGRESSION MODEL
self.activation = T.dot(input, self.w) + self.b
self.prediction = self.build_prediction()
# CLASSIFICATION COST
self.classification_cost = self.build_classification_cost(target)
# REGULARIZATION COST
self.regularization = self.build_regularization()
# TOTAL COST
self.cost = self.classification_cost
if self.regularize:
self.cost = self.cost + self.regularization
# GET THE GRADIENTS NECESSARY TO FIT OUR PARAMETERS
self.grad_w, self.grad_b, grad_act = T.grad(self.cost, [self.w, self.b, self.prediction])
print('grads', self.grad_w, self.grad_b)
# INTERFACE METHODS
self.update = M.Method([input, target],
[self.cost, self.grad_w, self.grad_b, grad_act],
updates={self.w: self.w - self.stepsize * self.grad_w,
self.b: self.b - self.stepsize * self.grad_b})
self.apply = M.Method(input, self.prediction)
def params(self):
return self.w, self.b
def _instance_initialize(self, obj, input_size = None, target_size = None,
seed = 1827, **init):
# obj is an "instance" of this module holding values for each member and
# functions for each method
if input_size and target_size:
# initialize w and b in a special way using input_size and target_size
sz = (input_size, target_size)
rng = np.random.RandomState(seed)
obj.w = rng.uniform(size = sz, low = -0.5, high = 0.5)
obj.b = np.zeros(target_size)
obj.stepsize = 0.01
# here we call the default_initialize method, which takes all the name: value
# pairs in init and sets the property with that name to the provided value
# this covers setting stepsize, l2_coef; w and b can be set that way too
# we call it after as we want the parameter to superseed the default value.
M.default_initialize(obj,**init)
def build_regularization(self):
return T.zero() # no regularization!
class SpecifiedRegressionLayer(RegressionLayer):
""" XE mean cross entropy"""
def build_prediction(self):
# return NN.softmax(self.activation) #use this line to expose a slow subtensor
# implementation
return NN.sigmoid(self.activation)
def build_classification_cost(self, target):
self.classification_cost_matrix = (target - self.prediction)**2
#print self.classification_cost_matrix.type
self.classification_costs = T.sum(self.classification_cost_matrix, axis=1)
return T.sum(self.classification_costs)
def build_regularization(self):
self.l2_coef = T.scalar() # we can add a hyper parameter if we need to
return self.l2_coef * T.sum(self.w * self.w)
class PrintEverythingMode(theano.Mode):
def __init__(self, linker, optimizer=None):
def print_eval(i, node, fn):
print(i, node, [input[0] for input in fn.inputs], end=' ')
fn()
print([output[0] for output in fn.outputs])
wrap_linker = theano.gof.WrapLinkerMany([linker], [print_eval])
super(PrintEverythingMode, self).__init__(wrap_linker, optimizer)
def test_module_advanced_example():
profmode = PrintEverythingMode(theano.gof.OpWiseCLinker(), 'fast_run')
data_x = np.random.randn(4, 10)
data_y = [ [int(x)] for x in (np.random.randn(4) > 0)]
model = SpecifiedRegressionLayer(regularize = False).make(input_size = 10,
target_size = 1,
stepsize = 0.1,
mode=profmode)
for i in xrange(1000):
xe, gw, gb, ga = model.update(data_x, data_y)
if i % 100 == 0:
print(i, xe)
pass
#for inputs, targets in my_training_set():
#print "cost:", model.update(inputs, targets)
print("final weights:", model.w)
print("final biases:", model.b)
profmode.print_summary()
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论