提交 98213004 authored 作者: James Bergstra's avatar James Bergstra

merge w conflict in nvcc_compiler due to rpath thing

差异被折叠。
"""
Test compilation modes
"""
from nose.plugins.skip import SkipTest
import unittest
import theano
import numpy
import random
import numpy.random
from theano.tests import unittest_tools as utt
class T_bunch_of_modes(unittest.TestCase):
def test1(self):
# this is a quick test after the LazyLinker branch merge
# to check that all the current modes can still be used.
linker_classes_involved = []
for modename in theano.config.__class__.__dict__['mode'].all:
x = T.matrix()
y = T.vector()
f = theano.function([x,y], x+y, mode=modename)
# test that it runs something
f([[1,2],[3,4]], [5, 6])
linker_classes_involved.append(f.maker.mode.linker.__class__)
print 'MODE:', modename, f.maker.mode.linker, 'stop'
# regression check:
# there should be
# - VM_Linker
# - OpWiseCLinker (FAST_RUN)
# - WrapLinker (PROFILE_MODE)
# - PerformLinker (FAST_COMPILE)
# - DebugMode's Linker (DEBUG_MODE)
assert 5 == len(set(linker_classes_involved))
if __name__ == '__main__':
unittest.main()
...@@ -146,7 +146,7 @@ from link import \ ...@@ -146,7 +146,7 @@ from link import \
Container, Linker, LocalLinker, PerformLinker, WrapLinker, WrapLinkerMany Container, Linker, LocalLinker, PerformLinker, WrapLinker, WrapLinkerMany
from op import \ from op import \
Op Op, PureOp
from opt import (Optimizer, optimizer, SeqOptimizer, from opt import (Optimizer, optimizer, SeqOptimizer,
MergeOptimizer, MergeOptMerge, MergeOptimizer, MergeOptMerge,
......
...@@ -13,7 +13,7 @@ AddConfigVar('nvcc.compiler_bindir', ...@@ -13,7 +13,7 @@ AddConfigVar('nvcc.compiler_bindir',
"If defined, nvcc compiler driver will seek g++ and gcc in this directory", "If defined, nvcc compiler driver will seek g++ and gcc in this directory",
StrParam("")) StrParam(""))
AddConfigVar('cuda.nvccflags', AddConfigVar('nvcc.flags',
"Extra compiler flags for nvcc", "Extra compiler flags for nvcc",
StrParam("")) StrParam(""))
...@@ -183,11 +183,9 @@ def nvcc_module_compile_str( ...@@ -183,11 +183,9 @@ def nvcc_module_compile_str(
if sys.platform != 'darwin': if sys.platform != 'darwin':
# the 64bit CUDA libs are in the same files as are named by the function above # the 64bit CUDA libs are in the same files as are named by the function above
rpaths.append(os.path.join(config.cuda.root,'lib64')) rpaths.append(os.path.join(config.cuda.root,'lib64'))
for rpath in rpaths: for rpath in rpaths:
cmd.extend(['-Xlinker',','.join(['-rpath',rpath])]) cmd.extend(['-Xlinker',','.join(['-rpath',rpath])])
nvccflags = [flag for flag in config.cuda.nvccflags.split(' ') if flag] cmd.extend([flag for flag in config.nvcc.flags.split(' ') if flag])
cmd.extend(nvccflags)
cmd.extend('-I%s'%idir for idir in include_dirs) cmd.extend('-I%s'%idir for idir in include_dirs)
cmd.extend(['-o',lib_filename]) cmd.extend(['-o',lib_filename])
cmd.append(os.path.split(cppfilename)[-1]) cmd.append(os.path.split(cppfilename)[-1])
......
...@@ -133,6 +133,79 @@ def sp_ones_like(x): ...@@ -133,6 +133,79 @@ def sp_ones_like(x):
data, indices, indptr, shape = csm_properties(x) #TODO: don't restrict to CSM formats data, indices, indptr, shape = csm_properties(x) #TODO: don't restrict to CSM formats
return CSM(format=x.format)(tensor.ones_like(data), indices, indptr, shape) return CSM(format=x.format)(tensor.ones_like(data), indices, indptr, shape)
class _sparse_py_operators:
T = property(lambda self: transpose(self), doc = "Return aliased transpose of self (read-only)")
def __neg__(self): return neg(self)
def __add__(left, right): return add(left, right)
def __radd__(right, left): return add(left, right)
def __sub__(left, right): return sub(left, right)
def __rsub__(right, left): return sub(left, right)
def __mul__(left, right): return mul(left, right)
def __rmul__(left, right): return mul(left, right)
#extra pseudo-operator symbols
def __dot__(left, right): return structured_dot(left, right)
def __rdot__(right, left): return structured_dot(left, right)
#N.B. THIS IS COMMENTED OUT ON PURPOSE!!!
# Discussion with Fred & James (at least, and maybe others before)
# we decided that casting from a sparse to dense should be explicit
# because it's usually something you want to be pretty careful about,
# and not to do by accident.
#def _as_TensorVariable(self):
# return dense_from_sparse(self)
shape = property(lambda self: tensor.shape(dense_from_sparse(self))) # don't worry!
# ... the plan is that the ShapeFeature in tensor.opt will do shape propagation
# ... and remove the dense_from_sparse from the graph. This will *NOT* actually expand
# ... your sparse matrix just to get the shape.
ndim = property(lambda self: self.type.ndim)
dtype = property(lambda self: self.type.dtype)
class SparseVariable(gof.Variable, _sparse_py_operators):
dtype = property(lambda self: self.type.dtype)
format = property(lambda self: self.type.format)
def __str__(self):
return '%s{%s,%s}'%(
self.__class__.__name__,
self.format,
self.dtype)
def __repr__(self):
return str(self)
class SparseConstantSignature(tuple):
def __eq__(self, other):
(a, b), (x,y) = self, other
return a == x \
and (b.dtype == y.dtype)\
and (type(b) == type(y))\
and (b.shape == y.shape)\
and (abs(b-y).sum() < 1e-6 * b.nnz)
def __hash__(self):
(a,b) = self
return hash(type(self)) ^ hash(a) ^ hash(type(b))
class SparseConstant(gof.Constant, _sparse_py_operators):
dtype = property(lambda self: self.type.dtype)
format = property(lambda self: self.type.format)
def signature(self):
assert self.data is not None
return SparseConstantSignature((self.type, self.data))
def __str__(self):
return '%s{%s,%s,shape=%s,nnz=%s}'%(
self.__class__.__name__,
self.format,
self.dtype,
self.data.shape,
self.data.nnz)
def __repr__(self):
return str(self)
class SparseValue(gof.Value, _sparse_py_operators):
dtype = property(lambda self: self.type.dtype)
format = property(lambda self: self.type.format)
class SparseType(gof.Type): class SparseType(gof.Type):
""" """
...@@ -149,6 +222,9 @@ class SparseType(gof.Type): ...@@ -149,6 +222,9 @@ class SparseType(gof.Type):
dtype_set = set(['int', 'int8', 'int16','int32', 'int64', 'float32', 'float64', 'complex64','complex128']) dtype_set = set(['int', 'int8', 'int16','int32', 'int64', 'float32', 'float64', 'complex64','complex128'])
ndim = 2 ndim = 2
Variable = SparseVariable
Constant = SparseConstant
def __init__(self, format, dtype): def __init__(self, format, dtype):
""" """
Fundamental way to create a sparse node. Fundamental way to create a sparse node.
...@@ -248,65 +324,6 @@ csr_dmatrix = SparseType(format='csr', dtype='float64') ...@@ -248,65 +324,6 @@ csr_dmatrix = SparseType(format='csr', dtype='float64')
csc_fmatrix = SparseType(format='csc', dtype='float32') csc_fmatrix = SparseType(format='csc', dtype='float32')
csr_fmatrix = SparseType(format='csr', dtype='float32') csr_fmatrix = SparseType(format='csr', dtype='float32')
class _sparse_py_operators:
T = property(lambda self: transpose(self), doc = "Return aliased transpose of self (read-only)")
def __neg__(self): return neg(self)
def __add__(left, right): return add(left, right)
def __radd__(right, left): return add(left, right)
def __sub__(left, right): return sub(left, right)
def __rsub__(right, left): return sub(left, right)
def __mul__(left, right): return mul(left, right)
def __rmul__(left, right): return mul(left, right)
#extra pseudo-operator symbols
def __dot__(left, right): return structured_dot(left, right)
def __rdot__(right, left): return structured_dot(left, right)
#N.B. THIS IS COMMENTED OUT ON PURPOSE!!!
# Discussion with Fred & James (at least, and maybe others before)
# we decided that casting from a sparse to dense should be explicit
# because it's usually something you want to be pretty careful about,
# and not to do by accident.
#def _as_TensorVariable(self):
# return dense_from_sparse(self)
shape = property(lambda self: tensor.shape(dense_from_sparse(self))) # don't worry!
# ... the plan is that the ShapeFeature in tensor.opt will do shape propagation
# ... and remove the dense_from_sparse from the graph. This will *NOT* actually expand
# ... your sparse matrix just to get the shape.
ndim = property(lambda self: self.type.ndim)
dtype = property(lambda self: self.type.dtype)
class SparseVariable(gof.Variable, _sparse_py_operators):
dtype = property(lambda self: self.type.dtype)
format = property(lambda self: self.type.format)
class SparseConstantSignature(tuple):
def __eq__(self, other):
(a, b), (x,y) = self, other
return a == x \
and (b.dtype == y.dtype)\
and (type(b) == type(y))\
and (b.shape == y.shape)\
and (abs(b-y).sum() < 1e-6 * b.nnz)
def __hash__(self):
(a,b) = self
return hash(type(self)) ^ hash(a) ^ hash(type(b))
class SparseConstant(gof.Constant, _sparse_py_operators):
dtype = property(lambda self: self.type.dtype)
format = property(lambda self: self.type.format)
def signature(self):
assert self.data is not None
return SparseConstantSignature((self.type, self.data))
class SparseValue(gof.Value, _sparse_py_operators):
dtype = property(lambda self: self.type.dtype)
format = property(lambda self: self.type.format)
# CONSTRUCTION # CONSTRUCTION
class CSMProperties(gof.Op): class CSMProperties(gof.Op):
"""Extract all of .data .indices and .indptr""" """Extract all of .data .indices and .indptr"""
......
...@@ -937,6 +937,9 @@ def _gemm_from_node2(node): ...@@ -937,6 +937,9 @@ def _gemm_from_node2(node):
lst = _factor_canonicalized(lst) lst = _factor_canonicalized(lst)
rval = _gemm_from_factored_list(lst) rval = _gemm_from_factored_list(lst)
#print "RVAL", rval #print "RVAL", rval
# THIS GOT COMMENTED OUT AT SOME POINT - ASK P.Lamblin maybe why?
#if rval:
# assert rval[0].type == node.outputs[0].type, (rval[0].type, node.outputs[0].type)
if rval and (rval[0].type == node.outputs[0].type): if rval and (rval[0].type == node.outputs[0].type):
return rval return rval
......
...@@ -3057,30 +3057,33 @@ def constant_folding(node): ...@@ -3057,30 +3057,33 @@ def constant_folding(node):
for input in node.inputs: for input in node.inputs:
if not isinstance(input, Constant): if not isinstance(input, Constant):
return False return False
try: #condition: all inputs are constant
storage = [[None] for output in node.outputs]
node.op.perform(node, [x.data for x in node.inputs], storage) storage_map=dict([(i,[i.data]) for i in node.inputs])
except MethodNotDefined: compute_map=dict([(i,[True]) for i in node.inputs])
tmp_inputs = [x.type() for x in node.inputs] for o in node.outputs:
f = compile.function( storage_map[o] = [None]
inputs=tmp_inputs, compute_map[o] = [False]
outputs=node.op.make_node(*tmp_inputs).outputs,
mode=compile.Mode(linker='c|py',optimizer=None)) thunk = node.op.make_thunk(node, storage_map, compute_map,
xvals = f(*[x.data for x in node.inputs]) no_recycling=[])
storage = [[xv] for xv in xvals]
required = thunk()
msg = [] assert not required # a node whose inputs are all provided should always
assert len(storage) == len(node.outputs) # return successfully
for s, output in zip(storage, node.outputs):
rval = []
for output in node.outputs:
assert compute_map[output][0], (output, storage_map[output][0])
try: try:
constant = output.type.Constant constant = output.type.Constant
except: except AttributeError:
constant = Constant constant = Constant
msg += [constant(output.type, s[0])] rval.append(constant(output.type, storage_map[output][0]))
return msg return rval
register_canonicalize(constant_folding, 'fast_compile') register_canonicalize(constant_folding, 'fast_compile')
register_stabilize(constant_folding) # because register_stabilize(constant_folding)
register_specialize(constant_folding) register_specialize(constant_folding)
def _is_1(expr): def _is_1(expr):
......
...@@ -49,11 +49,14 @@ class T_random_function(unittest.TestCase): ...@@ -49,11 +49,14 @@ class T_random_function(unittest.TestCase):
rng_R = random_state_type() rng_R = random_state_type()
# use make_node to override some of the self.args # use make_node to override some of the self.args
post_r2, out2 = rf2(rng_R, (4,), -2, 2) post_r2, out2 = rf2(rng_R, (4,), -2, 2) # NOT INPLACE
post_r2_4, out2_4 = rf2(rng_R, (4,), -4.0, 2) post_r4, out4 = rf4(rng_R, (4,), -4, 4) # INPLACE
post_r2_4_4, out2_4_4 = rf2(rng_R, (4,), -4.0, 4.0) post_r2_4, out2_4 = rf2(rng_R, (4,), -4.0, 2) # NOT INPLACE
post_r4, out4 = rf4(rng_R, (4,), -4, 4) post_r2_4_4, out2_4_4 = rf2(rng_R, (4,), -4.0, 4.0) # NOT INPLACE
# configure out4 to be computed inplace
# The update expression means that the random state rng_R will
# be maintained by post_r4
f = compile.function( f = compile.function(
[compile.In(rng_R, [compile.In(rng_R,
value=numpy.random.RandomState(utt.fetch_seed()), value=numpy.random.RandomState(utt.fetch_seed()),
...@@ -65,9 +68,25 @@ class T_random_function(unittest.TestCase): ...@@ -65,9 +68,25 @@ class T_random_function(unittest.TestCase):
f2, f4, f2_4, f2_4_4 = f() f2, f4, f2_4, f2_4_4 = f()
f2b, f4b, f2_4b, f2_4_4b = f() f2b, f4b, f2_4b, f2_4_4b = f()
assert numpy.allclose(f2*2, f4) print f2
assert numpy.allclose(f2_4_4, f4) print f4
assert not numpy.allclose(f4, f4b) print f2_4
print f2_4_4
#print f2b
#print f4b
#print f2_4b
#print f2_4_4b
# setting bounds is same as multiplying by 2
assert numpy.allclose(f2*2, f4), (f2, f4)
# retrieving from non-inplace generator
# is same as inplace one for first call
assert numpy.allclose(f2_4_4, f4), (f2_4_4, f4)
# f4 changes from call to call, that the update has worked
assert not numpy.allclose(f4, f4b), (f4, f4b)
def test_inplace_optimization(self): def test_inplace_optimization(self):
"""Test that FAST_RUN includes the random_make_inplace optimization""" """Test that FAST_RUN includes the random_make_inplace optimization"""
......
...@@ -13,19 +13,32 @@ from theano.tests import unittest_tools as utt ...@@ -13,19 +13,32 @@ from theano.tests import unittest_tools as utt
should ensure that it will remain operational should ensure that it will remain operational
''' '''
class T_diverse(unittest.TestCase): class T_scipy(unittest.TestCase):
def setUp(self): def setUp(self):
utt.seed_rng() utt.seed_rng()
self.orig_floatX = theano.config.floatX
def tearDown(self):
theano.config.floatX = self.orig_floatX
def scipy_paper_example1(self): def test_scipy_paper_example1(self):
a = theano.tensor.vector('a') # declare variable a = theano.tensor.vector('a') # declare variable
b = a + a**10 # build expression b = a + a**10 # build expression
f = theano.function([a], b) # compile function f = theano.function([a], b) # compile function
assert numpy.all(f([0,1,2]) == numpy.array([0,2,1026])) assert numpy.all(f([0,1,2]) == numpy.array([0,2,1026]))
def scipy_papaer_example2(self): def test_scipy_paper_example2(self):
''' This just sees if things compile well and if they run ''' ''' This just sees if things compile well and if they run '''
# PREAMPBLE
T = theano.tensor
shared = theano.shared
function = theano.function
rng = numpy.random
theano.config.floatX='float64'
#
# ACTUAL SCRIPT FROM PAPER
x = T.matrix() x = T.matrix()
y = T.vector() y = T.vector()
w = shared(rng.randn(100)) w = shared(rng.randn(100))
...@@ -52,6 +65,7 @@ class T_diverse(unittest.TestCase): ...@@ -52,6 +65,7 @@ class T_diverse(unittest.TestCase):
for i in range(training_steps): for i in range(training_steps):
pred, err = train(D[0], D[1]) pred, err = train(D[0], D[1])
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论