提交 7a3a3e51 authored 作者: Olivier Breuleux's avatar Olivier Breuleux

merge

#include <cassert>
#include <cstdlib>
#include <cstdio>
#include <cmath>
#include <gsl/gsl_rng.h>
#include <gsl/gsl_blas.h>
typedef float real;
int main(int argc, char **argv)
{
assert(argc == 4);
int neg = strtol(argv[1], 0, 0);
int nout = strtol(argv[2], 0, 0);
int nhid = strtol(argv[3], 0, 0);
double lr = 0.01;
gsl_rng * rng = gsl_rng_alloc (gsl_rng_taus);
gsl_rng_set(rng, 234);
gsl_matrix * x = gsl_matrix_alloc(neg, nout);
gsl_matrix * w = gsl_matrix_alloc(nout, nhid);
gsl_vector * a = gsl_vector_alloc(nhid);
gsl_vector * b = gsl_vector_alloc(nout);
gsl_matrix * xw = gsl_matrix_alloc(neg, nhid);
gsl_matrix * hid = gsl_matrix_alloc(neg, nhid);
gsl_matrix * hidwt = gsl_matrix_alloc(neg, nout);
gsl_matrix * g_hidwt = gsl_matrix_alloc(neg, nout);
gsl_matrix * g_hid = gsl_matrix_alloc(neg, nhid);
gsl_matrix * g_w = gsl_matrix_alloc(nout, nhid);
gsl_vector * g_b = gsl_vector_alloc(nout);
for (int i = 0; i < neg*nout; ++i) x->data[i] = (gsl_rng_uniform(rng) -0.5)*1.5;
for (int i = 0; i < nout*nhid; ++i) w->data[i] = gsl_rng_uniform(rng);
for (int i = 0; i < nhid; ++i) a->data[i] = 0.0;
for (int i = 0; i < nout; ++i) b->data[i] = 0.0;
//
//
//
//
double err = 0.0;
for (int iter = 0; iter < 1000; ++iter)
{
gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, x, w, 0.0, xw);
for (int i = 0; i < neg; ++i)
for (int j = 0; j < nhid; ++j)
{
double act = xw->data[i*nhid+j] + a->data[j];
hid->data[i*nhid+j] = tanh(act);
}
gsl_blas_dgemm(CblasNoTrans, CblasTrans, 1.0, hid, w, 0.0, hidwt);
for (int i = 0; i < nout; ++i) g_b->data[i] = 0.0;
err = 0.0;
for (int i = 0; i < neg; ++i)
for (int j = 0; j < nout; ++j)
{
double act = hidwt->data[i*nout+j] + b->data[j];
double out = tanh(act);
double g_out = out - x->data[i*nout+j];
err += g_out * g_out;
g_hidwt->data[i*nout+j] = g_out * (1.0 - out*out);
g_b->data[j] += g_hidwt->data[i*nout+j];
}
for (int i = 0; i < nout; ++i) b->data[i] -= lr * g_b->data[i];
if (1)
{
gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, g_hidwt, w, 0.0, g_hid);
gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, g_hidwt, hid, 0.0, g_w);
for (int i = 0; i < neg; ++i)
for (int j = 0; j < nhid; ++j)
{
g_hid->data[i*nhid+j] *= (1.0 - hid->data[i*nhid+j] * hid->data[i*nhid+j]);
a->data[j] -= lr * g_hid->data[i*nhid+j];
}
gsl_blas_dgemm(CblasTrans, CblasNoTrans, -lr, x, g_hid, 1.0, w);
for (int i = 0; i < nout*nhid; ++i) w->data[i] -= lr * g_w->data[i];
}
}
fprintf(stdout, "%lf\n", 0.5 * err);
//skip freeing
return 0;
}
from __future__ import absolute_import
import numpy
import sys
import time
import theano
import theano.tensor as T
class Opt(object):
merge = theano.gof.MergeOptimizer()
gemm_opt_1 = theano.gof.TopoOptimizer(theano.tensor_opt.gemm_pattern_1)
sqr_opt_0 = theano.gof.TopoOptimizer(
theano.gof.PatternSub(
(T.mul,'x', 'x'),
(T.sqr, 'x')))
ident_opt_0 = theano.gof.TopoOptimizer(
theano.gof.PatternSub(
(T.sqr, (T.sqrt,'x')),
'x',
allow_multiple_clients=True))
ident_opt_1 = theano.gof.TopoOptimizer(
theano.gof.PatternSub(
(T.sqrt, (T.sqr,'x')),
'x',
allow_multiple_clients=True))
ident_muldiv_0 = theano.gof.TopoOptimizer(
theano.gof.PatternSub(
(T.mul, 'x', (T.div,'y', 'x')),
'y',
allow_multiple_clients=True))
ident_muldiv_1 = theano.gof.TopoOptimizer(
theano.gof.PatternSub(
(T.mul, (T.div,'y', 'x'), 'x'),
'y',
allow_multiple_clients=True))
ident_muldiv_2 = theano.gof.TopoOptimizer(
theano.gof.PatternSub(
(T.div, (T.mul,'y', 'x'), 'x'),
'y',
allow_multiple_clients=True))
ident_muldiv_3 = theano.gof.TopoOptimizer(
theano.gof.PatternSub(
(T.div, (T.mul,'y', 'x'), 'y'),
'x',
allow_multiple_clients=True))
def __call__(self, env):
self.merge(env)
#eliminate identities
if 0:
print 'SKIPPING optimizations'
else:
self.ident_opt_0(env)
self.ident_opt_1(env)
self.ident_muldiv_0(env)
self.ident_muldiv_1(env)
self.ident_muldiv_2(env)
self.ident_muldiv_3(env)
self.gemm_opt_1(env)
self.sqr_opt_0(env)
self.merge(env)
def aa_fn(hid_fn, out_fn):
x = T.matrix() # input, target
w = T.matrix() # weights
a = T.vector() # hid bias
b = T.vector() # output bias
hid = hid_fn(T.dot(x, w) + a)
out = out_fn(T.dot(hid, w.T) + b)
err = 0.5 * T.sum((out - x)**2)
params = [w, a, b]
gparams = T.grad(err, params)
uparams = [T.sub_inplace(p, 0.01 * gp) for p, gp in zip(params, gparams)]
return theano.function([x, w, a, b], [err] + uparams
, linker = theano.gof.OpWiseCLinker()
#, linker = theano.gof.PerformLinker()
, optimizer = Opt()
)
aa_tanh_tanh = aa_fn(T.tanh, T.tanh)
neg, nout, nhid = [int(a) for a in sys.argv[1:]]
rng = numpy.random.RandomState(342)
x = (rng.rand(neg, nout)-0.5) * 1.5
w = rng.rand(nout, nhid)
a = rng.randn(nhid) * 0.0
b = rng.randn(nout) * 0.0
t = time.time()
for i in xrange(1000):
err_and_stuff = aa_tanh_tanh(x, w, a, b)
print time.time() - t, err_and_stuff[0]
......@@ -809,7 +809,7 @@ def ldflags():
try:
t0, t1, t2 = t[0:3]
assert t0 == '-'
except e:
except:
raise ValueError('invalid token in THEANO_BLAS_LDFLAGS', t)
if t1 == 'L':
raise ValueError('library dir not allowed in THEANO_BLAS_LDFLAGS', t)
......
......@@ -6,7 +6,7 @@ Defines Linkers that deal with C implementations.
# Python imports
from copy import copy
import md5
import re
import re #for set_compiledir
import os, sys, platform
# weave import
......@@ -19,45 +19,52 @@ import graph
import link
import utils
def set_compiledir(path=None):
"""Set the directory into which theano will compile code objects
def compile_dir():
"""Return the directory (name) in which scipy.weave should store code objects.
@param path: an absolute path or relative path. An argument of None will
trigger one of two default paths: firstly an environment variable called
'THEANO_COMPILEDIR' will be sought; failing that, an architecture-specific
directory will be chosen within $HOME/.theano.
If the environment variable THEANO_COMPILEDIR is set, its value is returned.
If not, a directory of the form $HOME/.theano/compiledir_<platform Id>.
@type path: string or None
As a test, this function touches the file __init__.py in the returned
directory, and raises OSError if there's a problem.
@return: None
The returned directory is created automatically using os.makedirs.
This directory is appended to the sys.path search path before being
returned, if the touch was successful.
@note: This function will create the path (recursively) as a folder if it
is not present, not readable, or not writable. New folders will be created
with mode 0700.
"""
if os.getenv('THEANO_COMPILEDIR'):
cachedir = os.getenv('THEANO_COMPILEDIR')
else:
# use (and possibly create) a default code cache location
platform_id = platform.platform() + '-' + platform.processor()
import re
platform_id = re.sub("[\(\)\s]+", "_", platform_id)
cachedir = os.path.join(os.getenv('HOME'), '.theano', 'compiledir_'+platform_id)
if not os.access(cachedir, os.R_OK | os.W_OK):
os.makedirs(cachedir, 7<<6) #read-write-execute for this user only
cachedir_init = cachedir+'/__init__.py'
# PROBLEM: sometimes touch returns -1 for no reason, the simple hack below
# solved the problem, but weird...
#touch = os.system('touch '+cachedir_init)
#if touch :
#raise OSError('touch %s returned %i' % (cachedir_init, touch))
hack = open(cachedir_init,'w')
hack.close()
if cachedir not in sys.path:
sys.path.append(cachedir)
return cachedir
# N.B. The path is stored as an attribute of this function
if path is None:
# we need to set the default, which can come from one of two places
if os.getenv('THEANO_COMPILEDIR'):
path = os.getenv('THEANO_COMPILEDIR')
else:
platform_id = platform.platform() + '-' + platform.processor()
platform_id = re.sub("[\(\)\s]+", "_", platform_id)
path = os.path.join(os.getenv('HOME'), '.theano', 'compiledir_'+platform_id)
if not os.access(path, os.R_OK | os.W_OK):
os.makedirs(path, 7<<6) #read-write-execute for this user only
# PROBLEM: sometimes the first approach based on os.system('touch')
# returned -1 for an unknown reason; the alternate approach here worked
# in all cases... it was weird.
open(os.path.join(path, '__init__.py'), 'w').close()
set_compiledir.compiledir = path
def get_compiledir():
"""Return the directory where theano code objects should be compiled
@rtype: string
"""
if not hasattr(set_compiledir, 'compiledir'):
set_compiledir()
return set_compiledir.compiledir
class CodeBlock:
......@@ -723,7 +730,8 @@ class CLinker(link.Linker):
instantiate.customize.add_library(lib)
mod.add_function(instantiate)
mod.compile(location = compile_dir())
#mod.compile(location = compile_dir())
mod.compile(location = get_compiledir())
module = __import__("%s" % (module_name), {}, {}, [module_name])
self.instantiate = module.instantiate
......
......@@ -395,7 +395,7 @@ class _tensor_py_operators:
def __iter__(self):
# This prevents accidental iteration via builtin.sum(self)
raise TypeError('Tensor does not support iteration. '
'Maybe you are using builtin.sum instead of theano.tensor.sum?')
'Maybe you are using builtin.sum instead of theano.tensor.sum? (Maybe .max?)')
......@@ -519,10 +519,18 @@ class MaxAndArgmax(Op):
def perform(self, node, (x, axis), (max, max_idx)):
max[0] = numpy.max(x, axis)
max_idx[0] = numpy.argmax(x, axis)
# def grad(self, (x, axis), (g_max, g_max_idx)):
# # This only works if axis is 0, else the max is broadcasted wrong in the call to eq
# g_x = eq(max(x, axis), x) * g_max
# return g_x, None
def grad(self, (x, axis), (g_max, g_max_idx)):
# @warning: This only works if axis is 0, else the max is
# broadcasted wrong in the call to eq.
# @note: This function should work correctly for L{vector}s.
# (x, y), (gz, gw)
# gz*dz/dx + gw*dw/dx, gz*dz/dy + gw*dw/dy
# gMax * dMax/dx + gArgMax * dArgMax/dx, gMax * dMax/daxis + gArgMax * dArgMax/daxis
# g_max has one less dimension than x, so you need to complete g_max to x's shape
# when axis=0 the broadcasting mechanism does it automatically
assert axis.data == 0
g_x = eq(max(x, axis), x) * g_max
return g_x, None
max_and_argmax = MaxAndArgmax()
......@@ -1016,6 +1024,13 @@ class Outer(Op):
outer = Outer()
class Gemm(Op):
"""
In-place generalization of matrix product (dot):
z = gemm(z,a,x,y,b)
with a,b scalars, is equivalent to
z = b*z + a*dot(x,y)
"""
E_rank = 'gemm only works for rank 2'
E_scalar = 'gemm requires scalar argument'
E_z_uniq = 'argument z aliased to x or y'
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论