提交 fba3e888 authored 作者: Frédéric Bastien's avatar Frédéric Bastien 提交者: GitHub

Merge pull request #5609 from nouiz/cleanup

Remove the old benchmark directory.
The code written by James Bergstra in this directory has been superseded
by the scipy benchmarking code, which is in another repository:
https://github.com/jaberg/DeepLearningBenchmarks
The remainder of the code in this directory remains undocumented.
aa.x : aa.cc
g++ -O3 -ffast-math aa.cc -o aa.x -L${PUB_PREFIX}/lib -lgsl ${THEANO_BLAS_LDFLAGS}
clean :
rm aa.x
/*
*
* g++ -O2 -ffast-math -I$PUB_PREFIX/include aa.cc -o aa.x -lgsl -lgslcblas
*
* g++ -O2 -ffast-math -I$PUB_PREFIX/include aa.cc -o aa.x -L$PUB_PREFIX/lib -lgsl -lcblas -lgoto -lgfortran
*
* ./aa.x 10 5 7 1000
*
* */
#include <cassert>
#include <cstdlib>
#include <cstdio>
#include <cmath>
#include <gsl/gsl_rng.h>
#include <gsl/gsl_blas.h>
#include <time.h>
#include <sys/time.h>
double pytime(const struct timeval * tv)
{
return (double) tv->tv_sec + (double) tv->tv_usec / 1000000.0;
}
int main(int argc, char **argv)
{
assert(argc == 5);
int neg = strtol(argv[1], 0, 0);
int nout = strtol(argv[2], 0, 0);
int nin = nout;
int nhid = strtol(argv[3], 0, 0);
int niter = strtol(argv[4], 0, 0);
double lr = 0.01;
gsl_rng * rng = gsl_rng_alloc (gsl_rng_taus);
gsl_rng_set(rng, 234);
gsl_matrix * x = gsl_matrix_alloc(neg, nin);
gsl_matrix * w = gsl_matrix_alloc(nin, nhid);
gsl_vector * a = gsl_vector_alloc(nhid);
gsl_vector * b = gsl_vector_alloc(nout);
gsl_matrix * xw = gsl_matrix_alloc(neg, nhid);
gsl_matrix * hid = gsl_matrix_alloc(neg, nhid);
gsl_matrix * hidwt = gsl_matrix_alloc(neg, nout);
gsl_matrix * g_hidwt = gsl_matrix_alloc(neg, nout);
gsl_matrix * g_hid = gsl_matrix_alloc(neg, nhid);
gsl_matrix * g_w = gsl_matrix_alloc(nout, nhid);
gsl_vector * g_b = gsl_vector_alloc(nout);
for (int i = 0; i < neg*nout; ++i) x->data[i] = (gsl_rng_uniform(rng) -0.5)*1.5;
for (int i = 0; i < nout*nhid; ++i) w->data[i] = gsl_rng_uniform(rng);
for (int i = 0; i < nhid; ++i) a->data[i] = 0.0;
for (int i = 0; i < nout; ++i) b->data[i] = 0.0;
//
//
//
//
struct timeval tv0, tv1;
struct timeval tdot0, tdot1;
double time_of_dot = 0.0;
gettimeofday(&tv0, 0);
double err = 0.0;
for (int iter = 0; iter < niter; ++iter)
{
gettimeofday(&tdot0, 0);
gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, x, w, 0.0, xw);
gettimeofday(&tdot1, 0);
time_of_dot += pytime(&tdot1) - pytime(&tdot0);
for (int i = 0; i < neg; ++i)
for (int j = 0; j < nhid; ++j)
{
double act = xw->data[i*nhid+j] + a->data[j];
hid->data[i*nhid+j] = tanh(act);
}
gettimeofday(&tdot0, 0);
gsl_blas_dgemm(CblasNoTrans, CblasTrans, 1.0, hid, w, 0.0, hidwt);
gettimeofday(&tdot1, 0);
time_of_dot += pytime(&tdot1) - pytime(&tdot0);
for (int i = 0; i < nout; ++i) g_b->data[i] = 0.0;
err = 0.0;
for (int i = 0; i < neg; ++i)
for (int j = 0; j < nout; ++j)
{
double act = hidwt->data[i*nout+j] + b->data[j];
double out = tanh(act);
double g_out = out - x->data[i*nout+j];
err += g_out * g_out;
g_hidwt->data[i*nout+j] = g_out * (1.0 - out*out);
g_b->data[j] += g_hidwt->data[i*nout+j];
}
for (int i = 0; i < nout; ++i) b->data[i] -= lr * g_b->data[i];
if (1)
{
gettimeofday(&tdot0, 0);
gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, g_hidwt, w, 0.0, g_hid);
gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, g_hidwt, hid, 0.0, g_w);
gettimeofday(&tdot1, 0);
time_of_dot += pytime(&tdot1) - pytime(&tdot0);
for (int i = 0; i < neg; ++i)
for (int j = 0; j < nhid; ++j)
{
g_hid->data[i*nhid+j] *= (1.0 - hid->data[i*nhid+j] * hid->data[i*nhid+j]);
a->data[j] -= lr * g_hid->data[i*nhid+j];
}
gettimeofday(&tdot0, 0);
gsl_blas_dgemm(CblasTrans, CblasNoTrans, -lr, x, g_hid, 1.0, w);
gettimeofday(&tdot1, 0);
time_of_dot += pytime(&tdot1) - pytime(&tdot0);
for (int i = 0; i < nout*nhid; ++i) w->data[i] -= lr * g_w->data[i];
}
}
gettimeofday(&tv1, 0);
double total_time = pytime(&tv1) - pytime(&tv0);
fprintf(stdout, "took = %lfs to get err %lf\n", total_time, 0.5 * err);
fprintf(stdout, "... of which %.2lfs was spent in dgemm (fraction: %.2lf)\n", time_of_dot, time_of_dot / total_time);
//skip freeing
return 0;
}
#!/usr/bin/env python
from __future__ import absolute_import, print_function, division
import numpy as np
import sys
import time
import theano
import theano.tensor as T
import theano.sandbox
from six.moves import xrange
from theano.compile import module, Mode
from theano import gof, Op, Apply
from theano.tensor import blas, opt
# numpy: aa_numpy.py
# c : aa.cc
if 0:
class Opt(object):
merge = theano.gof.MergeOptimizer()
gemm_opt_1 = theano.gof.TopoOptimizer(theano.tensor_opt.gemm_pattern_1)
gemm_opt_2 = theano.gof.TopoOptimizer( # d -= a * (dot()+transpose(dot))
theano.gof.PatternSub(
(
T.sub_inplace,
'd',
(
T.mul,
dict(pattern = (T.DimShuffle((), ['x', 'x'], inplace = True), 'a'),
allow_multiple_clients = True),
(
T.add,
(T.dot, 'b', 'c'),
(T.transpose_inplace, (T.dot, 'f', 'g'))
)
)
),
(
T.gemm,
(
T.gemm,
'd',
(T.neg, 'a'),
(T.transpose_inplace, 'g'),
(T.transpose_inplace, 'f'),
T.constant(1.0)
),
(T.neg, 'a'),
'b',
'c',
T.constant(1.0)
),
allow_multiple_clients = False))
sqr = []
sqr.append( theano.gof.TopoOptimizer(
theano.gof.PatternSub(
(T.mul,'x', 'x'),
(T.sqr, 'x'), allow_multiple_clients=True)))
sqr.append(theano.gof.TopoOptimizer(
theano.gof.PatternSub(
(T.pow, 'x', (T.DimShuffle((), ['x', 'x'], inplace=True), T.constant(2))),
(T.sqr, 'x'), allow_multiple_clients=True)))
ident_opt_list = []
ident_opt_list.append( # remove explicit copies
theano.gof.TopoOptimizer(
theano.gof.PatternSub(
(T.tensor_copy, 'x'),
'x',
allow_multiple_clients=True)))
ident_opt_list.append( # remove double-transpose
theano.gof.TopoOptimizer(
theano.gof.PatternSub(
(T.transpose_inplace, (T.transpose_inplace, 'x')),
'x',
allow_multiple_clients=True)))
ident_opt_list.append(
theano.gof.TopoOptimizer(
theano.gof.PatternSub(
(T.sqr, (T.sqrt,'x')),
'x',
allow_multiple_clients=True)))
ident_opt_list.append(
theano.gof.TopoOptimizer(
theano.gof.PatternSub(
(T.sqrt, (T.sqr,'x')),
'x',
allow_multiple_clients=True)))
ident_opt_list.append(
theano.gof.TopoOptimizer(
theano.gof.PatternSub(
(T.mul, 'x', (T.div,'y', 'x')),
'y',
allow_multiple_clients=True)))
ident_opt_list.append(
theano.gof.TopoOptimizer(
theano.gof.PatternSub(
(T.mul, (T.div,'y', 'x'), 'x'),
'y',
allow_multiple_clients=True)))
ident_opt_list.append(
theano.gof.TopoOptimizer(
theano.gof.PatternSub(
(T.div, (T.mul,'y', 'x'), 'x'),
'y',
allow_multiple_clients=True)))
ident_opt_list.append(
theano.gof.TopoOptimizer(
theano.gof.PatternSub(
(T.div, (T.mul,'y', 'x'), 'y'),
'x',
allow_multiple_clients=True)))
def __call__(self, env):
self.merge(env)
#eliminate identities
if 0:
print('SKIPPING optimizations')
else:
for opt in self.ident_opt_list:
opt(env)
for opt in self.sqr:
opt(env)
self.gemm_opt_1(env)
self.gemm_opt_2(env)
self.merge(env)
def print_graph_linker(print_prog=True):
if 1:
imap = {None:'-'}
def blah(i, node, thunk):
imap[node] = str(i)
if print_prog:# and node.op.__class__ is T.DimShuffle:
if False and node.op == T.DimShuffle((), ['x', 'x'], inplace = True):
print(node.op == T.DimShuffle((), ['x', 'x'],
inplace=True), end=' ')
print(node.inputs[0], type(node.inputs[0]), end=' ')
print(node.inputs[0].equals(T.constant(2)), end=' ')
outputs = node.outputs
inputs = theano.gof.graph.inputs(outputs)
print('node ', i, node, end=' ')
print(':'.join([imap[inp.owner] for inp in node.inputs]))
#print theano.sandbox.pprint.pp.process_graph(inputs, outputs)
return theano.sandbox.wraplinker.WrapLinkerMany(
[theano.gof.OpWiseCLinker()],
[theano.sandbox.wraplinker.run_all
,blah
#,theano.sandbox.wraplinker.numpy_notall_isfinite
])
else:
return theano.gof.OpWiseCLinker()
class M(module.Module):
def __init__(self):
super(M, self).__init__()
x = T.matrix('x') # input, target
self.w = module.Member(T.matrix('w')) # weights
self.a = module.Member(T.vector('a')) # hid bias
self.b = module.Member(T.vector('b')) # output bias
self.hid = T.tanh(T.dot(x, self.w) + self.a)
hid = self.hid
self.out = T.tanh(T.dot(hid, self.w.T) + self.b)
out = self.out
self.err = 0.5 * T.sum((out - x)**2)
err = self.err
params = [self.w, self.a, self.b]
gparams = T.grad(err, params)
updates = [(p, p - 0.01 * gp) for p, gp in zip(params, gparams)]
self.step = module.Method([x], err, updates=dict(updates))
mod = M()
mode = 'FAST_RUN'
mode = Mode(optimizer='fast_run', linker=theano.gof.OpWiseCLinker(nice_errors=True))
mode = Mode(optimizer='fast_run', linker='c')
mode = Mode(optimizer='fast_run', linker='c|py')
print(mod.pretty(mode=mode))
m = mod.make(mode=mode)
neg, nout, nhid, niter = [int(a) for a in sys.argv[1:]]
rng = np.random.RandomState(342)
m.w = rng.rand(nout, nhid)
m.a = rng.randn(nhid) * 0.0
m.b = rng.randn(nout) * 0.0
x = (rng.rand(neg, nout)-0.5) * 1.5
t = time.time()
for i in xrange(niter):
err = m.step(x)
print('time: ',time.time() - t, 'err: ', err)
try:
mode.print_summary()
pass
except:
pass
#!/usr/bin/env python
from __future__ import absolute_import, print_function, division
import numpy as np
import sys
import time
from six.moves import xrange
# c: aa.cc
neg, nout, nhid, niter = [int(a) for a in sys.argv[1:]]
lr = 0.01
rng = np.random.RandomState(342)
w = rng.rand(nout, nhid)
a = rng.randn(nhid) * 0.0
b = rng.randn(nout) * 0.0
x = (rng.rand(neg, nout)-0.5) * 1.5
dot_time = 0.0
t = time.time()
for i in xrange(niter):
tt = time.time()
d = np.dot(x, w)
dot_time += time.time() - tt
hid = np.tanh(d + a)
tt = time.time()
d = np.dot(hid, w.T)
dot_time += time.time() - tt
out = np.tanh(d + b)
g_out = out - x
err = 0.5 * np.sum(g_out**2)
g_hidwt = g_out * (1.0 - out**2)
b -= lr * np.sum(g_hidwt, axis=0)
tt = time.time()
g_hid = np.dot(g_hidwt, w)
dot_time += time.time() - tt
g_hidin = g_hid * (1.0 - hid**2)
tt = time.time()
d = np.dot(g_hidwt.T, hid)
dd = np.dot(x.T, g_hidin)
dot_time += time.time() - tt
gw = (d + dd)
w -= lr * gw
a -= lr * np.sum(g_hidin, axis=0)
total_time = time.time() - t
print('time: ',total_time, 'err: ', err)
print(' of which', dot_time, 'was spent on dot. Fraction:', dot_time / total_time)
#!/bin/bash
python opencv.py $@
python conv2d.py $@
python scipy_conv.py $@
echo "WARNING the mode is valid for theano and scipy, but opencv use the mode same! Can opencv do the mode full?"
from __future__ import absolute_import, print_function, division
import sys, timeit, time
import numpy as np
import theano, theano.tensor.signal.conv
try:
img_shape = int(sys.argv[1]), int(sys.argv[2])
ker_shape = int(sys.argv[3]), int(sys.argv[4])
dtype = sys.argv[5]
except:
print("Usage: %s <img rows> <img cols> <ker rows> <ker cols> <dtype> [nb_call]" % sys.argv[0], file=sys.stderr)
sys.exit(-1)
nb_call = 1
if len(sys.argv)>6:
nb_call=int(sys.argv[6])
setup="""
import sys, timeit, time
import numpy as np
import theano, theano.tensor.signal.conv
img_shape = int(sys.argv[1]), int(sys.argv[2])
ker_shape = int(sys.argv[3]), int(sys.argv[4])
dtype = sys.argv[5]
img = theano.shared(np.ones(img_shape, dtype=dtype))
ker = theano.shared(np.ones(ker_shape, dtype=dtype))
out = theano.shared(np.ones((2,2,2), dtype=dtype))
"""
T = timeit.Timer("f()",
setup+"f = theano.function([], theano.tensor.signal.conv.conv2d(img, ker))")
time_without_shape = T.repeat(repeat=3, number=nb_call)
print(min(time_without_shape), 'theano without shape')
T = timeit.Timer("f()", setup+"""f = theano.function([], [],
updates={out:theano.tensor.signal.conv.conv2d(img,
ker,image_shape=img_shape,filter_shape=ker_shape)})""")
time_with_shape = T.repeat(repeat=3, number=nb_call)
print(min(time_with_shape), 'theano with shape')
from __future__ import absolute_import, print_function, division
import sys, timeit
import numpy as np
import scikits.image.opencv
try:
img_shape = int(sys.argv[1]), int(sys.argv[2])
ker_shape = int(sys.argv[3]), int(sys.argv[4])
dtype = sys.argv[5]
except:
print("Usage: %s <img rows> <img cols> <ker rows> <ker cols> <dtype> [nb_call]" % sys.argv[0], file=sys.stderr)
sys.exit(-1)
nb_call = 1
if len(sys.argv)>6:
nb_call=int(sys.argv[6])
T = timeit.Timer("f()","""
import scikits.image.opencv, sys, numpy as np
img_shape = int(sys.argv[1]), int(sys.argv[2])
ker_shape = int(sys.argv[3]), int(sys.argv[4])
dtype = sys.argv[5]
img = np.ones(img_shape, dtype=dtype)
ker = np.ones(ker_shape, dtype=dtype)
def f():
scikits.image.opencv.cvFilter2D(img, ker)
""")
time = T.repeat(repeat=3, number=nb_call)
print(min(time), "opencv")
from __future__ import absolute_import, print_function, division
import sys, timeit
try:
img_shape = int(sys.argv[1]), int(sys.argv[2])
ker_shape = int(sys.argv[3]), int(sys.argv[4])
dtype = sys.argv[5]
except:
print("Usage: %s <img rows> <img cols> <ker rows> <ker cols> <dtype> [nb_call]" % sys.argv[0], file=sys.stderr)
sys.exit(-1)
nb_call = 1
if len(sys.argv)>6:
nb_call=int(sys.argv[6])
T = timeit.Timer("f()","""
from scipy.signal import convolve2d
import numpy
img_shape = int(sys.argv[1]), int(sys.argv[2])
ker_shape = int(sys.argv[3]), int(sys.argv[4])
dtype = sys.argv[5]
img = numpy.ones(img_shape, dtype=dtype)
ker = numpy.ones(ker_shape, dtype=dtype)
def f():
convolve2d(img, ker, mode="valid")
""")
time = T.repeat(repeat=3, number=nb_call)
print(min(time), "scipy")
差异被折叠。
from __future__ import absolute_import, print_function, division
import theano
import numpy as np
from theano import tensor as T
from theano.tensor import nnet as NN
from six.moves import xrange
from theano.compile import module as M
class RegressionLayer(M.Module):
def __init__(self, input = None, target = None, regularize = True):
super(RegressionLayer, self).__init__() #boilerplate
# MODEL CONFIGURATION
self.regularize = regularize
# ACQUIRE/MAKE INPUT AND TARGET
if not input:
input = T.matrix('input')
if not target:
target = T.matrix('target')
# HYPER-PARAMETERS
self.stepsize = T.scalar() # a stepsize for gradient descent
# PARAMETERS
self.w = T.matrix() #the linear transform to apply to our input points
self.b = T.vector() #a vector of biases, which make our transform affine instead of linear
# REGRESSION MODEL
self.activation = T.dot(input, self.w) + self.b
self.prediction = self.build_prediction()
# CLASSIFICATION COST
self.classification_cost = self.build_classification_cost(target)
# REGULARIZATION COST
self.regularization = self.build_regularization()
# TOTAL COST
self.cost = self.classification_cost
if self.regularize:
self.cost = self.cost + self.regularization
# GET THE GRADIENTS NECESSARY TO FIT OUR PARAMETERS
self.grad_w, self.grad_b, grad_act = T.grad(self.cost, [self.w, self.b, self.prediction])
print('grads', self.grad_w, self.grad_b)
# INTERFACE METHODS
self.update = M.Method([input, target],
[self.cost, self.grad_w, self.grad_b, grad_act],
updates={self.w: self.w - self.stepsize * self.grad_w,
self.b: self.b - self.stepsize * self.grad_b})
self.apply = M.Method(input, self.prediction)
def params(self):
return self.w, self.b
def _instance_initialize(self, obj, input_size = None, target_size = None,
seed = 1827, **init):
# obj is an "instance" of this module holding values for each member and
# functions for each method
if input_size and target_size:
# initialize w and b in a special way using input_size and target_size
sz = (input_size, target_size)
rng = np.random.RandomState(seed)
obj.w = rng.uniform(size = sz, low = -0.5, high = 0.5)
obj.b = np.zeros(target_size)
obj.stepsize = 0.01
# here we call the default_initialize method, which takes all the name: value
# pairs in init and sets the property with that name to the provided value
# this covers setting stepsize, l2_coef; w and b can be set that way too
# we call it after as we want the parameter to superseed the default value.
M.default_initialize(obj,**init)
def build_regularization(self):
return T.zero() # no regularization!
class SpecifiedRegressionLayer(RegressionLayer):
""" XE mean cross entropy"""
def build_prediction(self):
# return NN.softmax(self.activation) #use this line to expose a slow subtensor
# implementation
return NN.sigmoid(self.activation)
def build_classification_cost(self, target):
self.classification_cost_matrix = (target - self.prediction)**2
#print self.classification_cost_matrix.type
self.classification_costs = T.sum(self.classification_cost_matrix, axis=1)
return T.sum(self.classification_costs)
def build_regularization(self):
self.l2_coef = T.scalar() # we can add a hyper parameter if we need to
return self.l2_coef * T.sum(self.w * self.w)
class PrintEverythingMode(theano.Mode):
def __init__(self, linker, optimizer=None):
def print_eval(i, node, fn):
print(i, node, [input[0] for input in fn.inputs], end=' ')
fn()
print([output[0] for output in fn.outputs])
wrap_linker = theano.gof.WrapLinkerMany([linker], [print_eval])
super(PrintEverythingMode, self).__init__(wrap_linker, optimizer)
def test_module_advanced_example():
profmode = PrintEverythingMode(theano.gof.OpWiseCLinker(), 'fast_run')
data_x = np.random.randn(4, 10)
data_y = [ [int(x)] for x in (np.random.randn(4) > 0)]
model = SpecifiedRegressionLayer(regularize = False).make(input_size = 10,
target_size = 1,
stepsize = 0.1,
mode=profmode)
for i in xrange(1000):
xe, gw, gb, ga = model.update(data_x, data_y)
if i % 100 == 0:
print(i, xe)
pass
#for inputs, targets in my_training_set():
#print "cost:", model.update(inputs, targets)
print("final weights:", model.w)
print("final biases:", model.b)
profmode.print_summary()
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论