提交 d653a636 authored 作者: desjagui@atchoum.iro.umontreal.ca's avatar desjagui@atchoum.iro.umontreal.ca

merge

aa.x : aa.cc
g++ -O3 -ffast-math aa.cc -o aa.x -L${PUB_PREFIX}/lib -lgsl -lcblas -lgoto -lgfortran -lm
g++ -O3 -ffast-math aa.cc -o aa.x -L${PUB_PREFIX}/lib -lgsl ${THEANO_BLAS_LDFLAGS}
clean :
rm aa.x
......@@ -28,6 +28,7 @@ int main(int argc, char **argv)
int neg = strtol(argv[1], 0, 0);
int nout = strtol(argv[2], 0, 0);
int nin = nout;
int nhid = strtol(argv[3], 0, 0);
int niter = strtol(argv[4], 0, 0);
double lr = 0.01;
......@@ -35,8 +36,8 @@ int main(int argc, char **argv)
gsl_rng_set(rng, 234);
gsl_matrix * x = gsl_matrix_alloc(neg, nout);
gsl_matrix * w = gsl_matrix_alloc(nout, nhid);
gsl_matrix * x = gsl_matrix_alloc(neg, nin);
gsl_matrix * w = gsl_matrix_alloc(nin, nhid);
gsl_vector * a = gsl_vector_alloc(nhid);
gsl_vector * b = gsl_vector_alloc(nout);
gsl_matrix * xw = gsl_matrix_alloc(neg, nhid);
......@@ -59,11 +60,17 @@ int main(int argc, char **argv)
struct timeval tv0, tv1;
struct timeval tdot0, tdot1;
double time_of_dot = 0.0;
gettimeofday(&tv0, 0);
double err = 0.0;
for (int iter = 0; iter < niter; ++iter)
{
gettimeofday(&tdot0, 0);
gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, x, w, 0.0, xw);
gettimeofday(&tdot1, 0);
time_of_dot += pytime(&tdot1) - pytime(&tdot0);
for (int i = 0; i < neg; ++i)
for (int j = 0; j < nhid; ++j)
......@@ -72,7 +79,10 @@ int main(int argc, char **argv)
hid->data[i*nhid+j] = tanh(act);
}
gettimeofday(&tdot0, 0);
gsl_blas_dgemm(CblasNoTrans, CblasTrans, 1.0, hid, w, 0.0, hidwt);
gettimeofday(&tdot1, 0);
time_of_dot += pytime(&tdot1) - pytime(&tdot0);
for (int i = 0; i < nout; ++i) g_b->data[i] = 0.0;
err = 0.0;
......@@ -90,8 +100,11 @@ int main(int argc, char **argv)
if (1)
{
gettimeofday(&tdot0, 0);
gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, g_hidwt, w, 0.0, g_hid);
gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, g_hidwt, hid, 0.0, g_w);
gettimeofday(&tdot1, 0);
time_of_dot += pytime(&tdot1) - pytime(&tdot0);
for (int i = 0; i < neg; ++i)
......@@ -101,14 +114,19 @@ int main(int argc, char **argv)
a->data[j] -= lr * g_hid->data[i*nhid+j];
}
gettimeofday(&tdot0, 0);
gsl_blas_dgemm(CblasTrans, CblasNoTrans, -lr, x, g_hid, 1.0, w);
gettimeofday(&tdot1, 0);
time_of_dot += pytime(&tdot1) - pytime(&tdot0);
for (int i = 0; i < nout*nhid; ++i) w->data[i] -= lr * g_w->data[i];
}
}
gettimeofday(&tv1, 0);
fprintf(stdout, "took = %lfs to get err %lf\n", pytime(&tv1) - pytime(&tv0), 0.5 * err);
double total_time = pytime(&tv1) - pytime(&tv0);
fprintf(stdout, "took = %lfs to get err %lf\n", total_time, 0.5 * err);
fprintf(stdout, "... of which %.2lfs was spent in dgemm (fraction: %.2lf)\n", time_of_dot, time_of_dot / total_time);
//skip freeing
return 0;
}
......
......@@ -8,7 +8,15 @@ import theano
import theano.tensor as T
import theano.sandbox
import theano.sandbox.wraplinker
from theano.compile import module
from theano.compile import module, Mode
from theano.sandbox.wraplinker import ProfileMode
from theano import gof, Op, Apply
from theano.tensor import blas, opt
# numpy: aa_numpy.py
# c : aa.cc
if 0:
class Opt(object):
......@@ -130,32 +138,29 @@ if 0:
self.merge(env)
def linker(print_prog=False):
if 1:
print 'wtf?'
#return theano.gof.OpWiseCLinker()
imap = {None:'-'}
def blah(i, node, thunk):
imap[node] = str(i)
if print_prog:# and node.op.__class__ is T.DimShuffle:
if False and node.op == T.DimShuffle((), ['x', 'x'], inplace = True):
print node.op == T.DimShuffle((), ['x', 'x'], inplace = True),
print node.inputs[0], type(node.inputs[0]),
print node.inputs[0].equals(T.constant(2)),
outputs = node.outputs
inputs = theano.gof.graph.inputs(outputs)
print 'node ', i, node,
print ':'.join([imap[inp.owner] for inp in node.inputs])
#print theano.sandbox.pprint.pp.process_graph(inputs, outputs)
return theano.sandbox.wraplinker.WrapLinkerMany(
[theano.gof.OpWiseCLinker()],
[theano.sandbox.wraplinker.run_all
,blah
#,theano.sandbox.wraplinker.numpy_notall_isfinite
])
else:
return theano.gof.OpWiseCLinker()
def print_graph_linker(print_prog=True):
if 1:
imap = {None:'-'}
def blah(i, node, thunk):
imap[node] = str(i)
if print_prog:# and node.op.__class__ is T.DimShuffle:
if False and node.op == T.DimShuffle((), ['x', 'x'], inplace = True):
print node.op == T.DimShuffle((), ['x', 'x'], inplace = True),
print node.inputs[0], type(node.inputs[0]),
print node.inputs[0].equals(T.constant(2)),
outputs = node.outputs
inputs = theano.gof.graph.inputs(outputs)
print 'node ', i, node,
print ':'.join([imap[inp.owner] for inp in node.inputs])
#print theano.sandbox.pprint.pp.process_graph(inputs, outputs)
return theano.sandbox.wraplinker.WrapLinkerMany(
[theano.gof.OpWiseCLinker()],
[theano.sandbox.wraplinker.run_all
,blah
#,theano.sandbox.wraplinker.numpy_notall_isfinite
])
else:
return theano.gof.OpWiseCLinker()
class M(module.Module):
......@@ -167,11 +172,14 @@ class M(module.Module):
self.a = module.Member(T.vector('a')) # hid bias
self.b = module.Member(T.vector('b')) # output bias
hid = T.tanh(T.dot(x, self.w) + self.a)
self.hid = T.tanh(T.dot(x, self.w) + self.a)
hid = self.hid
out = T.tanh(T.dot(hid, self.w.T) + self.b)
self.out = T.tanh(T.dot(hid, self.w.T) + self.b)
out = self.out
err = 0.5 * T.sum((out - x)**2)
self.err = 0.5 * T.sum((out - x)**2)
err = self.err
params = [self.w, self.a, self.b]
......@@ -182,7 +190,13 @@ class M(module.Module):
self.step = module.Method([x], err, updates=dict(updates))
mod = M()
m = mod.make(mode='FAST_RUN')
mode = 'FAST_RUN'
#mode = ProfileMode(optimizer='fast_run', linker=theano.gof.OpWiseCLinker())
mode = Mode(optimizer='fast_run', linker=theano.gof.OpWiseCLinker(nice_errors=True))
mode = Mode(optimizer='fast_run', linker='c')
mode = Mode(optimizer='fast_run', linker='c|py')
print mod.pretty(mode=mode)
m = mod.make(mode=mode)
neg, nout, nhid, niter = [int(a) for a in sys.argv[1:]]
rng = numpy.random.RandomState(342)
......@@ -196,4 +210,10 @@ t = time.time()
for i in xrange(niter):
err = m.step(x)
print 'time: ',time.time() - t, 'err: ', err
try:
mode.print_summary()
pass
except:
pass
#!/usr/bin/env python2.5
from __future__ import absolute_import
import numpy as N
import sys
import time
# c: aa.cc
neg, nout, nhid, niter = [int(a) for a in sys.argv[1:]]
lr = 0.01
rng = N.random.RandomState(342)
w = rng.rand(nout, nhid)
a = rng.randn(nhid) * 0.0
b = rng.randn(nout) * 0.0
x = (rng.rand(neg, nout)-0.5) * 1.5
dot_time = 0.0
t = time.time()
for i in xrange(niter):
tt = time.time()
d = N.dot(x, w)
dot_time += time.time() - tt
hid = N.tanh(d + a)
tt = time.time()
d = N.dot(hid, w.T)
dot_time += time.time() - tt
out = N.tanh(d + b)
g_out = out - x
err = 0.5 * N.sum(g_out**2)
g_hidwt = g_out * (1.0 - out**2)
b -= lr * N.sum(g_hidwt, axis=0)
tt = time.time()
g_hid = N.dot(g_hidwt, w)
dot_time += time.time() - tt
g_hidin = g_hid * (1.0 - hid**2)
tt = time.time()
d = N.dot(g_hidwt.T, hid)
dd = N.dot(x.T, g_hidin)
dot_time += time.time() - tt
gw = (d + dd)
w -= lr * gw
a -= lr * N.sum(g_hidin, axis=0)
total_time = time.time() - t
print 'time: ',total_time, 'err: ', err
print ' of which', dot_time, 'was spent on dot. Fraction:', dot_time / total_time
......@@ -89,8 +89,9 @@ Get the source and run the tests like this:
.. code-block:: bash
hg clone http://pylearn.org/hg/theano theano
cd theano
hg clone http://pylearn.org/hg/theano Theano
ln -s Theano/theano <someplace on your PYTHONPATH>/theano
cd Theano
nosetests
To update your library to the latest on pylearn.org, change directory (`cd`) to this `theano` folder and type
......
......@@ -664,6 +664,10 @@ class ComponentList(Composite):
return self.__class__(*[c.dup() for c in self._components])
def default_initialize(self, init = {}, **kwinit):
for k, initv in dict(init, **kwinit).iteritems():
self[k] = initv
class ComponentDictInstance(CompositeInstance):
"""
ComponentDictInstance is meant to be instantiated by ComponentDict.
......
......@@ -23,11 +23,12 @@ from op import \
from opt import \
Optimizer, optimizer, SeqOptimizer, \
MergeOptimizer, MergeOptMerge, \
LocalOptimizer, local_optimizer, LocalOptGroup, LocalOpKeyOptGroup, \
LocalOptimizer, local_optimizer, LocalOptGroup, \
OpSub, OpRemove, PatternSub, \
NavigatorOptimizer, TopoOptimizer, OpKeyOptimizer, EquilibriumOptimizer, \
NavigatorOptimizer, TopoOptimizer, EquilibriumOptimizer, \
keep_going, warn, \
InplaceOptimizer, PureThenInplaceOptimizer
#LocalOpKeyOptGroup, OpKeyOptimizer
from optdb import \
DB, Query, \
......
......@@ -686,7 +686,16 @@ class CLinker(link.Linker):
instantiate.customize.add_support_code(support_code)
instantiate.customize.add_support_code(self.struct_code)
instantiate.customize.add_support_code(static)
instantiate.customize.add_extra_compile_arg("-w")
for extra_arg in (
"-O2",
"-ffast-math",
#"-fprefetch-loop-arrays",
#"-ftree-vect-loop-version",
#"-ftree-loop-optimize",
#"-ftree-vectorize"):
"-w" #-w means supress all warnings
):
instantiate.customize.add_extra_compile_arg(extra_arg)
for arg in self.compile_args():
instantiate.customize.add_extra_compile_arg(arg)
for header in self.headers():
......@@ -739,6 +748,7 @@ def _execute(cthunk, init_tasks, tasks, error_storage):
exc_value = exc_type(_exc_value, task)
exc_value.__thunk_trace__ = trace # this can be used to retrieve the location the Op was declared
raise exc_type, exc_value, exc_trace
execute.cthunk = cthunk
return execute
......@@ -761,9 +771,12 @@ class OpWiseCLinker(link.LocalLinker):
__cache__ = {}
def __init__(self, fallback_on_perform = True):
def __init__(self,
fallback_on_perform = True,
nice_errors = True):
self.env = None
self.fallback_on_perform = fallback_on_perform
self.nice_errors = nice_errors
def accept(self, env, no_recycling = []):
if self.env is not None and self.env is not env:
......@@ -833,7 +846,9 @@ class OpWiseCLinker(link.LocalLinker):
else:
no_recycling = [storage_map[r] for r in no_recycling if r not in env.inputs]
f = link.streamline(env, thunks, order, no_recycling = no_recycling, profiler = profiler)
f = link.streamline(env, thunks, order,
no_recycling = no_recycling,
nice_errors = self.nice_errors)
return f, [link.Container(input, storage) for input, storage in zip(env.inputs, input_storage)], \
[link.Container(output, storage, True) for output, storage in zip(env.outputs, output_storage)], \
......@@ -841,7 +856,6 @@ class OpWiseCLinker(link.LocalLinker):
def _default_checker(x, y):
"""WRITEME
Default checker for DualLinker. This checks that the
......
......@@ -13,6 +13,7 @@ from collections import deque
import utils
_creation_idx = [0]
class Apply(utils.object2):
"""
......@@ -121,6 +122,13 @@ class Apply(utils.object2):
def __asapply__(self):
return self
def __hash__(self):
if not hasattr(self, '_creation_idx'):
self._creation_idx = _creation_idx[0]
_creation_idx[0] += 1
return self._creation_idx
def clone(self):
"""Duplicate this Apply instance with inputs = self.inputs.
......@@ -567,7 +575,10 @@ def general_toposort(r_out, deps, debug_print = False):
deps(i) should behave like a pure function (no funny business with internal state)
:note:
deps(i) can/should be cached by the deps function to be fast
deps(i) will be cached by this function (to be fast)
:note:
The order of the return value list is determined by the order of nodes returned by the deps() function.
"""
deps_cache = {}
def _deps(io):
......@@ -611,8 +622,9 @@ def general_toposort(r_out, deps, debug_print = False):
def io_toposort(i, o, orderings = {}):
"""WRITEME
"""
#the inputs are used only here in the function that decides what 'predecessors' to explore
iset = set(i)
def deps(obj):
def deps(obj):
rval = []
if obj not in iset:
if isinstance(obj, Result):
......
......@@ -5,6 +5,7 @@ from type import Type
import sys, traceback
from copy import copy
from cutils import run_cthunk
__excepthook = sys.excepthook
......@@ -225,9 +226,27 @@ def clear_storage_thunk(stg):
thunk.inputs = [stg]
return thunk
def streamline(env, thunks, order, no_recycling = [], profiler = None):
"""WRITEME"""
if profiler is None:
def streamline(env, thunks, order, no_recycling = [], profiler = None, nice_errors = True):
"""WRITEME
:param env:
:param thunks: the list of program instructions
:param order: the list of apply instances that gave rise to the thunks (same order as thunks)
:param no_recycling: storage elements that cannot be 'recycled' by repeatedly executing the
program. These storage elements are cleared before re-running.
:param profiler: deprecated
:param nice_errors: run in such a way that the double-traceback is printed. This costs a
bit of performance in the inner python loop.
"""
if profiler is not None:
raise NotImplementedError()
if nice_errors:
def f():
for x in no_recycling:
x[0] = None
......@@ -237,14 +256,13 @@ def streamline(env, thunks, order, no_recycling = [], profiler = None):
except:
raise_with_op(node)
else:
# don't worry about raise_with_op, just go a little faster.
#there is a mix of python and c thunks
def f():
for x in no_recycling:
x[0] = None
def g():
for thunk, node in zip(thunks, order):
profiler.profile_node(thunk, node)
profiler.profile_env(g, env)
f.profiler = profiler
for thunk in thunks:
thunk()
return f
class LocalLinker(Linker):
......
差异被折叠。
......@@ -4,16 +4,31 @@ import opt
class DB(object):
def __hash__(self):
if not hasattr(self, '_optimizer_idx'):
self._optimizer_idx = opt._optimizer_idx[0]
opt._optimizer_idx[0] += 1
return self._optimizer_idx
def __init__(self):
self.__db__ = defaultdict(set)
self._names = set()
def register(self, name, obj, *tags):
# N.B. obj is not an instance of class Optimizer.
# It is an instance of a DB.In the tests for example,
# this is not always the case.
if not isinstance(obj, (DB, opt.Optimizer, opt.LocalOptimizer)):
raise Exception('wtf', obj)
obj.name = name
if name in self.__db__:
raise ValueError('The name of the object cannot be an existing tag or the name of another existing object.', obj, name)
self.__db__[name] = set([obj])
self._names.add(name)
for tag in tags:
if tag in self._names:
raise ValueError('The tag of the object collides with a name.', obj, tag)
self.__db__[tag].add(obj)
def __query__(self, q):
......
if 0:
class _EquilibriumOptimizer(NavigatorOptimizer):
def __init__(self,
local_optimizers,
failure_callback = None,
max_depth = None,
max_use_ratio = None):
super(EquilibriumOptimizer, self).__init__(
None,
ignore_newtrees = False,
failure_callback = failure_callback)
self.local_optimizers = local_optimizers
self.max_depth = max_depth
self.max_use_ratio = max_use_ratio
self.tracks = defaultdict(list)
self.tracks0 = defaultdict(list)
max_depth = 0
for lopt in local_optimizers:
tracks = lopt.tracks()
for track in tracks:
max_depth = max(max_depth, len(track))
if self.max_depth is not None and max_depth > self.max_depth:
raise ValueError('One of the local optimizers exceeds the maximal depth.')
for i, op in enumerate(track):
if i == 0:
self.tracks0[op].append((track, i, lopt))
self.tracks[op].append((track, i, lopt))
def fetch_tracks(self, op):
return self.tracks[op] + self.tracks[None]
def fetch_tracks0(self, op):
return self.tracks0[op] + self.tracks0[None]
def backtrack(self, node, tasks):
candidates = self.fetch_tracks(node.op)
tracks = []
def filter(node, depth):
new_candidates = []
for candidate in candidates:
track, i, lopt = candidate
if i < depth:
pass
elif track[i-depth] in (None, node.op):
if i == depth:
tasks[node].append(lopt)
else:
tracks.append(candidate)
else:
new_candidates.append(candidate)
return new_candidates
depth = 0
nodes = [node]
while candidates:
for node in nodes:
candidates = filter(node, depth)
depth += 1
_nodes = nodes
nodes = reduce(list.__iadd__,
[reduce(list.__iadd__,
[[n for n, i in out.clients if not isinstance(n, str)] for out in node.outputs],
[]) for node in nodes],
[])
candidates = tracks
tracks = []
def apply(self, env):
tasks = defaultdict(list)
if self.max_use_ratio is not None:
max_uses = self.max_use_ratio * len(env.nodes)
runs = defaultdict(int)
else:
runs = None
def importer(node):
#print 'IMPORTING', node
self.backtrack(node, tasks)
def pruner(node):
try:
del tasks[node]
except KeyError:
pass
def chin(node, i, r, new_r):
if new_r.owner and not r.clients:
self.backtrack(new_r.owner, tasks)
# # == NOT IDEAL == #
# for node in env.nodes:
# importer(node)
for node in env.toposort():
tasks[node].extend(lopt for track, i, lopt in self.fetch_tracks0(node.op))
u = self.attach_updater(env, importer, pruner, chin)
print 'KEYS', map(hash, tasks.keys())
while tasks:
for node in tasks.iterkeys():
todo = tasks.pop(node)
break
for lopt in todo:
if runs is not None and runs[lopt] >= max_uses:
print >>sys.stderr, 'Warning: optimization exceeded its maximal use ratio: %s, %s' % (lopt, max_uses)
continue
success = self.process_node(env, node, lopt)
if success:
if runs is not None: runs[lopt] += 1
break
self.detach_updater(env, u)
# def match(self, node, candidates):
# candidates[:] = [candidate
# for candidate in candidates
# if candidate.current.op is None or candidate.current.op == node.op]
# for candidate in candidates:
# if candidate.current.inputs is not None:
# for in1, in2 in zip(candidate.current.inputs, node.inputs):
# if isinstance(in1, str):
# candidate.match[in1] = in2
# for client in node.clients:
# op = node.op
# patterns = self.pattern_base[(depth, op)].union(self.pattern_base[(depth, WILDCARD)])
# if not patterns:
# return patterns
# return self.match(node, depth + 1).intersection(patterns)
# def backtrack(self, node, q):
# for node2, i in node.clients:
# op2 = node2.op
......@@ -375,7 +375,7 @@ class TestEquilibrium(object):
x, y, z = map(MyResult, 'xyz')
e = op3(op4(x, y))
g = Env([x, y, z], [e])
print g
print 'before', g
sys.stderr = sys.stdout # display pesky warnings along with stdout
opt = EquilibriumOptimizer(
[PatternSub((op1, 'x', 'y'), (op2, 'x', 'y')),
......@@ -384,7 +384,7 @@ class TestEquilibrium(object):
],
max_use_ratio = 1. / len(g.nodes)) # each opt can only be applied once
opt.optimize(g)
print g
print 'after', g
assert str(g) == '[Op4(x, y)]'
......
from theano.gof.optdb import *
from unittest import TestCase
class Test_DB(TestCase):
def test_0(self):
class Opt(opt.Optimizer): #inheritance buys __hash__
name = 'blah'
db = DB()
db.register('a', Opt())
db.register('b', Opt())
db.register('c', Opt(), 'z', 'asdf')
try:
db.register('c', Opt()) #name taken
self.fail()
except ValueError, e:
if e[0].startswith("The name"):
pass
else:
raise
except:
self.fail()
try:
db.register('z', Opt()) #name collides with tag
self.fail()
except ValueError, e:
if e[0].startswith("The name"):
pass
else:
raise
except:
self.fail()
try:
db.register('u', Opt(), 'b') #name new but tag collides with name
self.fail()
except ValueError, e:
if e[0].startswith("The tag"):
pass
else:
raise
except:
self.fail()
......@@ -2,6 +2,7 @@ import gof #, gof.result
import numpy #for numeric_grad
from gof.python25 import all
import gof.utils
_msg_retType = 'op.grad(...) returned a non-list'
_msg_badlen = 'op.grad(...) returned wrong number of gradients'
......@@ -55,17 +56,17 @@ def grad_sources_inputs(sources, graph_inputs):
else:
gmap[r] = g_r
graph_outputs = gmap.keys()
graph_outputs = gof.utils.uniq([r for r,g in sources])
if graph_inputs is None:
graph_inputs = gof.graph.inputs(graph_outputs)
for node in gof.graph.io_toposort(graph_inputs, graph_outputs).__reversed__():
g_outputs = [gmap.get(o,None) for o in node.outputs]
#if all output gradients are None, continue
if all(map(lambda x:x is None, g_outputs)): continue
output_arg = g_outputs
input_arg = node.inputs
......
......@@ -235,17 +235,27 @@ class PPrinter:
else:
raise TypeError('Not enough arguments to call.')
special = dict(middle_dot = u"\u00B7",
big_sigma = u"\u03A3")
greek = dict(alpha = u"\u03B1",
beta = u"\u03B2",
gamma = u"\u03B3",
delta = u"\u03B4",
epsilon = u"\u03B5")
use_ascii = True
if use_ascii:
special = dict(middle_dot = "\dot",
big_sigma = "\Sigma")
greek = dict(alpha = "\alpha",
beta = "\beta",
gamma = "\gamma",
delta = "\delta",
epsilon = "\epsilon")
else:
special = dict(middle_dot = u"\u00B7",
big_sigma = u"\u03A3")
greek = dict(alpha = u"\u03B1",
beta = u"\u03B2",
gamma = u"\u03B3",
delta = u"\u03B4",
epsilon = u"\u03B5")
pprint = PPrinter()
......
......@@ -2,6 +2,7 @@ from __future__ import absolute_import
import time
import numpy
from ..gof.cutils import run_cthunk
from ..gof.link import WrapLinker
from ..compile.mode import Mode
......@@ -103,49 +104,82 @@ def DualLinker(linkers):
class ProfileMode(Mode):
def __init__(self, local_linker, optimizer=None):
def __init__(self, linker, optimizer=None):
local_time = [0.0]
apply_time = {}
op_time = {}
op_cimpl = {}
def blah(i, node, *thunks):
t0 = time.time()
for th in thunks:
th()
dt = time.time() - t0
if 0:
t0 = time.time()
for th in thunks:
th()
dt = time.time() - t0
elif 0: #more precise timing
for th in thunks:
t0 = time.time()
th()
dt = time.time() - t0
elif 1:
for th in thunks:
if hasattr(th, 'cthunk'):
t0 = time.time()
run_cthunk(th.cthunk)
dt = time.time() - t0
else:
t0 = time.time()
th()
dt = time.time() - t0
elif 1:
pass
else:
raise Exception('one of the cases has to run the thunks!')
local_time[0] += dt
apply_time[(i,node.op)] = apply_time.get((i,node.op), 0.0) + dt
op_time[node.op] = op_time.get(node.op, 0.0) + dt
op_cimpl[node.op] = hasattr(thunks[0], 'cthunk')
self.local_time = local_time
self.apply_time = apply_time
self.op_time = op_time
self.op_cimpl = op_cimpl
linker = WrapLinkerMany([local_linker], [blah])
wrap_linker = WrapLinkerMany([linker], [blah])
if optimizer:
Mode.__init__(self, linker, optimizer)
super(ProfileMode, self).__init__(wrap_linker, optimizer)
else:
Mode.__init__(self, linker)
super(ProfileMode, self).__init__(wrap_linker)
def print_summary(self):
local_time = self.local_time[0]
apply_time = self.apply_time
op_time = self.op_time
print 'local_time', local_time
print 'apply-wise times'
print ''
print 'ProfileMode.print_summary()'
print '---------------------------'
print ''
print 'local_time', local_time, '(Time spent running thunks)'
print 'Apply-wise summary: <fraction of local_time spent at this position> (<Apply position>, <Apply Op name>)'
atimes = [(t/local_time, (a[0], str(a[1]))) for a, t in apply_time.items()]
atimes.sort()
atimes.reverse()
for t,a in atimes[:15]:
print ' ', t, a
print ' ...' #show that we are ignoring applies that don't take much time
print 'op-wise times'
otimes = [(t/local_time, a) for a, t in op_time.items()]
print '\t%.3f\t%i\t%s' % (t, a[0], a[1])
print ' ... (remaining %i Apply instances account for %.2f of the runtime)'\
%(max(0, len(atimes)-15), sum(t for t, a in atimes[15:]))
n_ops_to_print = 20
print 'Op-wise summary: <fraction of local_time spent on this kind of Op> <Op name>'
otimes = [(t/local_time, a, self.op_cimpl[a]) for a, t in op_time.items()]
otimes.sort()
otimes.reverse()
for t,a in otimes[:15]:
print ' ', t, a
print ' ...' #show that we are ignoring applies that don't take much time
print sum(t for a,t in op_time.items())
for t,a,ci in otimes[:n_ops_to_print]:
print '\t%.3f\t%s %s' % (t, '*' if ci else ' ', a)
print ' ... (remaining %i Ops account for %.2f of the runtime)'\
%(max(0, len(otimes)-n_ops_to_print), sum(t for t, a, ci in
otimes[n_ops_to_print:]))
print '(*) Op is running a c implementation'
......@@ -2,6 +2,7 @@
from basic import *
import opt
import blas
import raw_random
from raw_random import \
......
差异被折叠。
差异被折叠。
差异被折叠。
......@@ -103,16 +103,18 @@ class DimShuffle(Op):
for i, b in enumerate(input_broadcastable):
if i not in new_order:
# we want to drop this dimension because it's not a value in new_order
if b == 1:
if b == 1: # 1 aka True
self.drop.append(i)
else:
# we cannot drop non-broadcastable dimensions
raise NotImplementedError("You cannot drop a non-broadcastable dimension.")
raise ValueError("You cannot drop a non-broadcastable dimension.")
else:
i2j[i] = j
j += 1
# transposition of non-broadcastable dimensions
# This is how the dimensions will be permuted, without accounting for the extra
# 'x' broadcastable dimensions to insert.
self.shuffle = [i2j[x] for x in new_order if x != 'x']
# list of dimensions of the output that are broadcastable and were not in the original input
......@@ -144,7 +146,8 @@ class DimShuffle(Op):
and self.input_broadcastable == other.input_broadcastable
def __hash__(self):
return hash(self.inplace) ^ hash(self.new_order) ^ hash(self.input_broadcastable)
return hash(type(self)) ^ hash(self.inplace) \
^ hash(self.new_order) ^ hash(self.input_broadcastable)
def __str__(self):
if self.inplace:
......@@ -175,13 +178,78 @@ class DimShuffle(Op):
storage[0] = res
def c_code(self, node, name, (input,), (res,), sub):
def statements(lst):
return ';\n'.join(lst) + ';'
nd_in = len(self.input_broadcastable)
nd_out = len(self.new_order)
check_input_nd = [('if (%(input)s->nd != ' + str(nd_in) + ')'
'{PyErr_SetString(PyExc_NotImplementedError, "input nd"); %(fail)s;}')]
clear_output = ['if (%(res)s) {Py_XDECREF(%(res)s);}']
shape_statements = ['npy_intp dimensions[%i]'%nd_out]
shape_statements += [('dimensions['+str(i)+'] = %(input)s->dimensions['+str(o)+']')
if o != 'x' else
('dimensions['+str(i)+'] = 1')
for i, o in enumerate(self.new_order)]
strides_statements = ['npy_intp strides[%i]'%nd_out]
strides_statements += [('strides['+str(i)+'] = %(input)s->strides['+str(o)+']')
if o != 'x' else
('strides['+str(i)+'] = 0')
for i, o in enumerate(self.new_order)]
if self.inplace:
get_base = ['{ PyArrayObject * base = %(input)s', 'Py_INCREF((PyObject*)base)']
else:
get_base = [('{ PyArrayObject * base = (PyArrayObject*)PyArray_FromAny((PyObject*)%(input)s, NULL,'
'0, 0, NPY_ALIGNED|NPY_ENSURECOPY, NULL)')]
alloc_output = [('%(res)s = (PyArrayObject*)PyArray_New(&PyArray_Type, '
'' + str(nd_out) + ', dimensions, '
'PyArray_TYPE(base), strides, '
'base->data, base->descr->elsize, '
'PyArray_FLAGS(base), NULL)'),
'%(res)s->base = (PyObject*)base',
'}']
full_code = statements(check_input_nd
+ clear_output
+ shape_statements
+ strides_statements
+ get_base
+ alloc_output)
if 0:
print 'C_CODE'
print ''
print self
print "IN BROAD", self.input_broadcastable
print "NEW ORDER", self.new_order
print "SHUFFLE", self.shuffle
print "AUGMENT", self.augment
print '------------'
print ''
print full_code
if 0:
import sys
sys.exit()
return full_code % dict(locals(), **sub)
def grad(self, (x, ), (gz, )):
gz = as_tensor(gz)
grad_order = ['x'] * len(x.type.broadcastable)
for i, v in enumerate(self.new_order):
if v != 'x':
grad_order[v] = i
return DimShuffle(gz.type.broadcastable, grad_order)(gz),
return [DimShuffle(gz.type.broadcastable, grad_order, inplace=True)(Elemwise(scalar.identity)(gz))]
......
from basic import _scal_elemwise, _transpose_inplace
from .basic import _scal_elemwise #, _transpose_inplace
from .. import scalar as scal
import elemwise
from .. import printing
......@@ -183,9 +183,11 @@ pprint.assign(div_inplace, printing.OperatorPrinter('/=', -1, 'left'))
pprint.assign(pow_inplace, printing.OperatorPrinter('**=', 1, 'right'))
transpose_inplace = _transpose_inplace
"""WRITEME"""
def transpose_inplace(x, **kwargs):
"""Perform a transpose on a tensor without copying the underlying storage"""
dims = range(x.ndim-1, -1, -1)
return elemwise.DimShuffle(x.broadcastable, dims, inplace=True)(x)
pprint.assign(transpose_inplace, printing.MemberPrinter('T'))
#pprint.assign(transpose_inplace, printing.MemberPrinter('T'))
......@@ -203,6 +203,7 @@ class SoftmaxWithBias(gof.Op):
for (j = 0; j < Nx[1]; ++j)
{
double row_ij = x_i[j * Sx] + b_i[j * Sb];
// std::cout << "1" << row_ij << "\\n";
row_max_j = (row_ij > row_max) ? j : row_max_j;
row_max = (row_ij > row_max) ? row_ij : row_max;
}
......@@ -210,13 +211,23 @@ class SoftmaxWithBias(gof.Op):
for (j = 0; j < Nx[1]; ++j)
{
double row_ij = x_i[j * Sx] + b_i[j * Sb];
// std::cout << "2" << row_ij << "\\n";
double sm_ij = exp(row_ij - row_max);
// std::cout << "3" << sm_ij << "\\n";
sum += sm_ij;
sm_i[j * Ssm] = sm_ij;
}
if ( (0.0 == sum) || (std::isinf(sum)))
if (std::isinf(sum))
{
//that was our best...
PyErr_SetString(PyExc_ValueError, "softmax is impossible (inf)!");
%(fail)s;
}
if (0.0 == sum)
{
//that was our best...
PyErr_SetString(PyExc_ValueError, "softmax is impossible (zero)!");
%(fail)s;
}
......@@ -600,6 +611,7 @@ class CrossentropySoftmax1HotWithBiasDx (gof.Op):
}
if (y_i >= %(dx)s->dimensions[1])
{
PyErr_SetString(PyExc_ValueError, "y_i >= dx dimensions[1]");
%(fail)s;
}
dx_i[y_i * Sdx] -= dnll_i;
......
"""Tensor optimizations addressing the ops in basic.py
"""
# TODO: intelligent merge for mul/add
# TODO: 0*x -> 0
......@@ -30,28 +31,6 @@ def in2out(*local_opts, **kwargs):
**kwargs)
# gemm: (d,a,b,c,s) -> d = d*s + a*dot(b,c)
# Transforms d -= a * dot(b, c) into gemm(d, -a, b, c, 1.0)
gemm_pattern_1 = gof.PatternSub((T.sub,
'd',
(T.mul,
dict(pattern = (T.DimShuffle((), ['x', 'x'], inplace = True), 'a'),
allow_multiple_clients = True),
(T.dot, 'b', 'c'))),
(T.gemm, 'd', (T.neg, 'a'), 'b', 'c', T.constant(1.0)),
allow_multiple_clients = False)
# gemm: (d,a,b,c,s) -> d = d*s + a*dot(b,c)
# Transforms dot(a, b) into gemm(zeros(2)(hstack(shape(a)[:1], shape(b)[1:])), 1.0, a, b, 1.0)
# The construction of the 'gemm' node may fail if, for example, a and b are not both matrices.
dot_to_gemm = gof.PatternSub((T.dot, 'a', 'b'),
(T.gemm, (T.Zeros(2),
(T.stack,
(T.Subtensor([slice(0, 1)]), (T.shape, 'a')),
(T.Subtensor([slice(1, 2)]), (T.shape, 'b')))),
T.constant(1.0), 'a', 'b', T.constant(1.0)),
allow_multiple_clients = False)
def _insert_inplace_optimizer(env):
"""
......@@ -91,12 +70,6 @@ def _insert_inplace_optimizer(env):
break
insert_inplace_optimizer = gof.optimizer(_insert_inplace_optimizer)
inplace_optimizer = gof.InplaceOptimizer(
gof.SeqOptimizer(out2in(gemm_pattern_1),
insert_inplace_optimizer,
failure_callback = gof.warn))
compile.optdb.register('inplace_opt', inplace_optimizer, 99, 'fast_run', 'inplace')
def register_canonicalize(lopt, *tags, **kwargs):
name = (kwargs and kwargs.pop('name')) or lopt.__name__
......@@ -216,6 +189,13 @@ register_canonicalize(local_shape_lift_dot)
################
def encompasses_broadcastable(b1, b2):
"""
Returns True if the broadcastable patterns b1 and b2 are such that b2 is
broadcasted to b1's shape and not the opposite.
:param b1: the broadcastable attribute of a tensor type
:param b2: the broadcastable attribute of a tensor type
"""
if len(b1) < len(b2):
return False
b1 = b1[-len(b2):]
......@@ -330,6 +310,7 @@ def local_fill_cut(node):
register_canonicalize(local_fill_cut)
register_canonicalize(gof.OpRemove(T.tensor_copy), name='remove_tensor_copy' )
@gof.local_optimizer([None, T.fill])
def local_fill_sink(node):
......@@ -524,9 +505,30 @@ class Canonizer(gof.LocalOptimizer):
return False
new = self.merge_num_denum(num, denum)
if new.type != out.type:
if new.dtype != out.dtype:
#new = T.fill(out, new)
new = T.fill(out, T.Elemwise(scalar.Identity(scalar.specific_out(getattr(scalar, out.type.dtype))))(new))
elem_op = T.Elemwise(scalar.Identity(scalar.specific_out(getattr(scalar, out.type.dtype))))
new = T.fill(out, elem_op(new))
if new.broadcastable != out.broadcastable:
#this case is tricky... we need to provide exactly the same kind of broadcastable
#pattern, but only if legal...
dlen = len(new.broadcastable) - len(out.broadcastable)
if dlen > 0:
#try to take the leading ranks of new.broadcastable, which should be broadcastable
# ranks
#if this means skipping over nonbroadcastable ranks, then DimShuffle will fail
dimshuffle_op = T.DimShuffle(new.broadcastable,
range(dlen, len(new.broadcastable)))
new = dimshuffle_op(new)
elif dlen < 0:
#we have to boost up a scalar or something
dimshuffle_op = T.DimShuffle(new.broadcastable,
['x' for x in range(-dlen)] + range(0, len(new.broadcastable)))
new = dimshuffle_op(new)
# if our if's above worked, this should be true. OTW investigate.
assert new.type == out.type
return [new]
def __str__(self):
......@@ -550,6 +552,7 @@ def local_neg_to_mul(node):
return [-1 * node.inputs[0]]
else:
return False
register_canonicalize(local_neg_to_mul)
@gof.local_optimizer([T.mul])
def local_mul_to_neg(node):
......@@ -557,6 +560,7 @@ def local_mul_to_neg(node):
return [-local_mul_canonizer.merge_num_denum(node.inputs[1:], [])]
else:
return False
register_specialize(local_mul_to_neg)
@gof.local_optimizer([T.div])
def local_div_to_inv(node):
......@@ -564,10 +568,120 @@ def local_div_to_inv(node):
return [T.inv(local_mul_canonizer.merge_num_denum(node.inputs[1:], []))]
else:
return False
register_canonicalize(local_neg_to_mul)
register_specialize(local_mul_to_neg)
register_specialize(local_div_to_inv)
@gof.local_optimizer([T.inv])
def local_inv_canon(node):
if node.op == T.inv:
return [T.pow(node.inputs[0], -1.0)]
else:
return False
register_canonicalize(local_inv_canon)
@gof.local_optimizer([T.pow])
def local_pow_canonicalize(node):
if node.op == T.pow:
if N.all(local_mul_canonizer.get_constant(node.inputs[1]) == 1.0):
return [T.fill(node.inputs[1], node.inputs[0])]
if N.all(local_mul_canonizer.get_constant(node.inputs[1]) == 0.0):
#extra fills here are to make sure the size of the output stays constant.
return [T.fill(node.inputs[0], T.fill(node.inputs[1], 1.0))]
else:
return False
register_canonicalize(local_pow_canonicalize)
@gof.local_optimizer([T.pow])
def local_pow_specialize(node):
#here, we are past the point of canonicalization, so we don't want to put in un-necessary fills.
if node.op == T.pow:
#the idea here is that we have pow(x, y)
xsym = node.inputs[0]
ysym = node.inputs[1]
y = local_mul_canonizer.get_constant(ysym)
if (y is not None) \
and encompasses_broadcastable(xsym.type.broadcastable, ysym.type.broadcastable):
if N.all(y == 2.0):
return [T.sqr(xsym)]
if N.all(y == 1.0):
return [xsym]
if N.all(y == 0.0):
return [T.fill(xsym, 1.0)]
if N.all(y == 0.5):
return [T.sqrt(xsym)]
if N.all(y == -0.5):
return [T.inv(T.sqrt(xsym))]
if N.all(y == -1.0):
return [T.inv(xsym)]
if N.all(y == -2.0):
return [T.inv(T.sqr(xsym))]
else:
return False
register_specialize(local_pow_specialize)
@gof.local_optimizer([T.mul])
def local_mul_specialize(node):
#here, we are past the point of canonicalization, so we don't want to put in un-necessary fills.
if node.op == T.mul:
#the idea here is that we have pow(x, y)
neg = False
new_inputs = []
for input in node.inputs:
y = local_mul_canonizer.get_constant(input)
if N.all(y == 1.0):
continue
elif N.all(y == -1.0):
neg ^= True #toggles
elif N.all(y == 0.0):
return [input]
else:
new_inputs.append(input)
if len(new_inputs) < len(node.inputs):
if len(new_inputs) == 0:
newval = -y.flatten()[0] if neg else y.flatten()[0]
return [T.TensorConstant(T.Tensor(dtype=node.outputs[0].type.dtype,
broadcastable = [True] * node.outputs[0].ndim), N.asarray(newval))]
if len(new_inputs) == 1:
return [-new_inputs[0]] if neg else new_inputs
else:
return [-T.mul(*new_inputs)] if neg else \
[T.mul(*new_inputs)]
else:
return False
register_specialize(local_mul_specialize)
if 0: #TODO: replace this with a c version of any InplaceDimShuffle
class _TransposeInplace(T.Op):
view_map = {0: [0]}
def make_node(self, input):
return T.Apply(self, [input],
[T.tensor(dtype = input.type.dtype,
broadcastable = reversed(input.type.broadcastable))])
def perform(self, node, (x, ), (z, )):
z[0] = x.T
def c_code(self, node, name, (x, ), (z, ), sub):
return """
PyArrayObject* transposed = (PyArrayObject*)PyArray_Transpose(%(x)s, NULL);
if (%(z)s) {
Py_XDECREF(%(z)s);
}
%(z)s = transposed;
""" % locals()
def __str__(self):
return "_TransposeInplace"
_transpose_inplace = _TransposeInplace()
@gof.local_optimizer([T.DimShuffle([False,False],[1,0],inplace=True)])
def local_dimshuffle_transposeinplace(node):
if node.op == T.DimShuffle([False,False],[1,0],inplace=True):
return [_transpose_inplace(node.inputs[0])]
return False
register_specialize(local_dimshuffle_transposeinplace)
register_canonicalize(local_mul_canonizer, name = 'local_mul_canonizer')
......@@ -724,8 +838,10 @@ def constant_folding(node):
register_canonicalize(constant_folding)
inplace_matrix_transpose = T.DimShuffle([False,False], [1,0], inplace=True)
local_transposed_dot = gof.PatternSub((inplace_matrix_transpose, (T.dot, 'x', 'y')),
(T.dot, (inplace_matrix_transpose, 'y'), (inplace_matrix_transpose, 'x')))
register_canonicalize(local_transposed_dot, name='local_transposed_dot')
# def _math_optimizer():
......
......@@ -662,56 +662,6 @@ class T_max_and_argmax(unittest.TestCase):
self.failUnless(i.shape == (2,3))
class T_transpose(unittest.TestCase):
def test0(self):
n = as_tensor(numpy.ones(()))
t = transpose(n)
self.failUnless(t.owner.op == inplace.transpose_inplace)
f = function([n], t)
tval = f(n.data)
self.failUnless(tval.shape == n.data.shape)
#test aliasing
tval += 55.0
self.failUnless(n.data == 1.0)
def test1(self):
n = as_tensor(numpy.ones(5))
t = transpose(n)
self.failUnless(t.owner.op == inplace.transpose_inplace)
f = function([n], t)
tval = f(n.data)
self.failUnless(tval.shape == n.data.shape)
#test aliasing
tval += 55.0
self.failUnless(n.data[0] == 1.0)
def test2(self):
n = as_tensor(numpy.ones((5,3)))
t = transpose(n)
self.failUnless(t.owner.op == inplace.transpose_inplace)
f = function([n], t)
tval = f(n.data)
self.failUnless(tval.shape == (3,5))
#test aliasing
tval += 55.0
self.failUnless(n.data[0,0] == 1.0)
def test3(self):
"""Test transpose of tensor, inplace version"""
n = as_tensor(numpy.ones((5,3,2)))
t = inplace.transpose_inplace(n)
self.failUnless(t.owner.op == inplace.transpose_inplace)
f = function([n], t)
tval = f(n.data)
self.failUnless(tval.shape == (2,3,5))
#test aliasing
tval += 55.0
self.failUnless(n.data[0,0,0] == 56.0)
def test_grad(self):
verify_grad(self, inplace.transpose_inplace, [numpy.random.rand(2, 3)])
verify_grad(self, inplace.transpose_inplace, [numpy.ones(3)])
class T_subtensor(unittest.TestCase):
def setUp(self):
Subtensor.debug = False
......@@ -1406,179 +1356,6 @@ class t_dot(unittest.TestCase):
#verify_grad(self, dot, [self.rand(), self.rand(2)])
#verify_grad(self, dot, [self.rand(), self.rand(2,5)])
class t_gemm(unittest.TestCase):
def setUp(self):
numpy.random.seed(44)
_approx_eq.debug = 0
Gemm.debug = False
@staticmethod
def _gemm(z,a,x,y,b):
assert a.shape == ()
assert b.shape == ()
return b * z + a * numpy.dot(x,y)
@staticmethod
def rand(*args):
return numpy.random.rand(*args)
def cmp(self, z, a, x, y, b):
def cmp_linker(z, a, x, y, b, l):
z,a,x,y,b = [numpy.asarray(p) for p in z,a,x,y,b]
z_orig = z.copy()
tz,ta,tx,ty,tb = [as_tensor(p).type() for p in z,a,x,y,b]
f = function([tz,ta,tx,ty,tb], gemm(tz,ta,tx,ty,tb), mode=compile.Mode(optimizer = None, linker = l))
new_z = f(z,a,x,y,b)
z_after = self._gemm(z_orig, a, x, y, b)
self.failUnless(z is new_z)
#print z_orig, z_after, z, type(z_orig), type(z_after), type(z)
#_approx_eq.debug = 1
self.failUnless(_approx_eq(z_after, z))
if a == 0.0 and b == 1.0:
return
else:
self.failIf(numpy.all(z_orig == z))
cmp_linker(copy(z), a, x, y, b, 'c|py')
cmp_linker(copy(z), a, x, y, b, 'c')
cmp_linker(copy(z), a, x, y, b, 'py')
def test0a(self):
Gemm.debug = True
try:
g = gemm([1.], 1., [1.], [1.], 1.)
except ValueError, e:
if e[0] is Gemm.E_rank:
return
self.fail()
def test0(self):
try:
self.cmp(1., 0., 1.0, 1.0, 1.0)
except ValueError, e:
if e[0] is Gemm.E_rank:
return
self.fail()
def test2(self):
try:
self.cmp(2., 1.0, [3,2,1.], [[1],[2],[3.]], 1.0)
except ValueError, e:
self.failUnless(e[0] == Gemm.E_rank)
return
self.fail()
def test4(self):
self.cmp(self.rand(3,4), 1.0, self.rand(3,5), self.rand(5,4), 0.0)
def test5(self): self.cmp(self.rand(3,4), 1.0,
self.rand(3,5), self.rand(5,4), 1.0)
def test6(self): self.cmp(self.rand(3,4), 1.0,
self.rand(3,5), self.rand(5,4), -1.0)
def test7(self): self.cmp(self.rand(3,4), 0.0,
self.rand(3,5), self.rand(5,4), 0.0)
def test8(self): self.cmp(self.rand(3,4), 0.0,
self.rand(3,5), self.rand(5,4), 0.6)
def test9(self): self.cmp(self.rand(3,4), 0.0,
self.rand(3,5), self.rand(5,4), -1.0)
def test10(self):
_approx_eq.debug = 1
self.cmp(self.rand(3,4), -1.0, self.rand(3,5), self.rand(5,4), 0.0)
def test11(self): self.cmp(self.rand(3,4), -1.0,
self.rand(3,5), self.rand(5,4), 1.0)
def test12(self): self.cmp(self.rand(3,4), -1.0,
self.rand(3,5), self.rand(5,4), -1.0)
def test_destroy_map0(self):
"""test that only first input can be overwritten"""
Z = as_tensor(self.rand(2,2))
try:
gemm(Z, 1.0, Z, Z, 1.0)
except ValueError, e:
if e[0] == Gemm.E_z_uniq:
return
self.fail()
def test_destroy_map1(self):
"""test that only first input can be overwritten"""
Z = as_tensor(self.rand(2,2))
A = as_tensor(self.rand(2,2))
try:
gemm(Z, 1.0, A, inplace.transpose_inplace(Z), 1.0)
except ValueError, e:
if e[0] == Gemm.E_z_uniq:
return
self.fail()
def test_destroy_map2(self):
"""test that only first input can be overwritten"""
Z = as_tensor(self.rand(2,2))
A = as_tensor(self.rand(2,2))
try:
gemm(Z, 1.0, inplace.transpose_inplace(Z), A, 1.0)
except ValueError, e:
if e[0] == Gemm.E_z_uniq:
return
self.fail()
def test_destroy_map3(self):
"""test that only first input can be overwritten"""
Z = as_tensor(self.rand(2,2))
A = as_tensor(self.rand(2,2))
try:
gemm(Z, 1.0, Z, A, 1.0)
except ValueError, e:
if e[0] == Gemm.E_z_uniq:
return
self.fail()
def test_destroy_map4(self):
"""test that dot args can be aliased"""
Z = value(self.rand(2,2))
A = value(self.rand(2,2))
eval_outputs([gemm(Z, 1.0, A, A, 1.0)])
eval_outputs([gemm(Z, 1.0, A, A.T, 1.0)])
def test_transposes(self):
# three square matrices which are not contiguous
A = self.rand(4,5)[:,:4]
B = self.rand(4,5)[:,:4]
C = self.rand(4,5)[:,:4]
def t(z,x,y,a=1.0, b=0.0,l='c|py',dt='float64'):
z,a,x,y,b = [numpy.asarray(p,dtype=dt) for p in z,a,x,y,b]
z_orig = z.copy()
z_after = self._gemm(z, a, x, y, b)
tz,ta,tx,ty,tb = [value(p) for p in z,a,x,y,b]
f = function([tz,ta,tx,ty,tb], gemm(tz,ta,tx,ty,tb), mode = compile.Mode(optimizer = None, linker=l))
f(z, a, x, y, b)
self.failUnless(_approx_eq(z_after, z), (z_orig, z_after, z, z_after - z))
f(z.T, a, y.T, x.T, b)
self.failUnless(_approx_eq(z_after, z))
t(C,A,B)
t(C.T, A, B)
t(C, A.T, B, dt='float32')
t(C, A, B.T)
t(C.T, A.T, B)
t(C, A.T, B.T, dt='float32')
t(C.T, A, B.T)
t(C.T, A.T, B.T, dt='float32')
t(C, A[:,:2], B[:2, :])
t(C.T, A[:,:2], B[:2, :], dt='float32')
t(C, A[:2,:].T, B[:2, :])
t(C.T, A[:2,:].T, B[:2, :], dt='float32')
t(C, A[:2,:].T, B[:, :2].T)
t(C.T, A[:2,:].T, B[:, :2].T)
try:
t(C.T, A[:2,:], B[:, :2].T)
except ValueError, e:
if e[0].find('aligned') >= 0:
return
self.fail()
class T_tensorfromscalar(unittest.TestCase):
def test0(self):
s = scal.constant(56)
......
差异被折叠。
差异被折叠。
差异被折叠。
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论