提交 3ca77162 authored 作者: Pascal Lamblin's avatar Pascal Lamblin

Merge pull request #1522 from nouiz/small_stuff

Small stuff
...@@ -54,9 +54,10 @@ if [ "$RELEASE" ]; then ...@@ -54,9 +54,10 @@ if [ "$RELEASE" ]; then
echo echo
fi fi
echo "Executing tests with mode=FAST_COMPILE with --batch=1000" # with --batch=1000" # The buildbot freeze sometimes when collecting the tests to run
echo "THEANO_FLAGS=${FLAGS},mode=FAST_COMPILE ${NOSETESTS} --batch=1000 ${ARGS}" echo "Executing tests with mode=FAST_COMPILE"
THEANO_FLAGS=${FLAGS},mode=FAST_COMPILE ${NOSETESTS} --batch=1000 ${ARGS} echo "THEANO_FLAGS=${FLAGS},mode=FAST_COMPILE ${NOSETESTS} ${ARGS}"
THEANO_FLAGS=${FLAGS},mode=FAST_COMPILE ${NOSETESTS} ${ARGS}
echo "Number of elements in the compiledir:" echo "Number of elements in the compiledir:"
ls ${COMPILEDIR}|wc -l ls ${COMPILEDIR}|wc -l
echo echo
......
...@@ -12,7 +12,7 @@ import tokenize ...@@ -12,7 +12,7 @@ import tokenize
import argparse import argparse
import reindent import reindent
from theano.compat.six.StringIO import StringIO from theano.compat.six import StringIO
SKIP_WHITESPACE_CHECK_FILENAME = ".hg/skip_whitespace_check" SKIP_WHITESPACE_CHECK_FILENAME = ".hg/skip_whitespace_check"
......
...@@ -12,22 +12,28 @@ if cuda_available: ...@@ -12,22 +12,28 @@ if cuda_available:
from theano.sandbox.cuda.basic_ops import host_from_gpu, gpu_from_host from theano.sandbox.cuda.basic_ops import host_from_gpu, gpu_from_host
from theano.sandbox.cuda.opt import register_opt from theano.sandbox.cuda.opt import register_opt
class MultinomialFromUniform(Op): class MultinomialFromUniform(Op):
'''Converts samples from a uniform into sample from a multinomial.''' '''Converts samples from a uniform into sample from a multinomial.'''
def __init__(self, odtype): def __init__(self, odtype):
self.odtype=odtype self.odtype = odtype
def __eq__(self, other): def __eq__(self, other):
return type(self) == type(other) and self.odtype==other.odtype return type(self) == type(other) and self.odtype == other.odtype
def __hash__(self): def __hash__(self):
return hash((type(self), self.odtype)) return hash((type(self), self.odtype))
def __str__(self): def __str__(self):
return '%s{%s}'%(self.__class__.__name__, self.odtype) return '%s{%s}' % (self.__class__.__name__, self.odtype)
def __setstate__(self, dct): def __setstate__(self, dct):
self.__dict__.update(dct) self.__dict__.update(dct)
try: try:
self.odtype self.odtype
except AttributeError: except AttributeError:
self.odtype='auto' self.odtype = 'auto'
def make_node(self, pvals, unis): def make_node(self, pvals, unis):
pvals = T.as_tensor_variable(pvals) pvals = T.as_tensor_variable(pvals)
unis = T.as_tensor_variable(unis) unis = T.as_tensor_variable(unis)
...@@ -35,11 +41,12 @@ class MultinomialFromUniform(Op): ...@@ -35,11 +41,12 @@ class MultinomialFromUniform(Op):
raise NotImplementedError('pvals ndim should be 2', pvals.ndim) raise NotImplementedError('pvals ndim should be 2', pvals.ndim)
if unis.ndim != 1: if unis.ndim != 1:
raise NotImplementedError('unis ndim should be 1', unis.ndim) raise NotImplementedError('unis ndim should be 1', unis.ndim)
if self.odtype=='auto': if self.odtype == 'auto':
odtype = pvals.dtype odtype = pvals.dtype
else: else:
odtype = self.odtype odtype = self.odtype
return Apply(self, [pvals, unis], [T.matrix(dtype=odtype)]) out = T.tensor(dtype=odtype, broadcastable=pvals.type.broadcastable)
return Apply(self, [pvals, unis], [out])
def grad(self, ins, outgrads): def grad(self, ins, outgrads):
pvals, unis = ins pvals, unis = ins
...@@ -121,6 +128,7 @@ class MultinomialFromUniform(Op): ...@@ -121,6 +128,7 @@ class MultinomialFromUniform(Op):
} }
} // END NESTED SCOPE } // END NESTED SCOPE
""" % locals() """ % locals()
def perform(self, node, ins, outs): def perform(self, node, ins, outs):
(pvals, unis) = ins (pvals, unis) = ins
(z,) = outs (z,) = outs
...@@ -165,7 +173,7 @@ class GpuMultinomialFromUniform(MultinomialFromUniform, GpuOp): ...@@ -165,7 +173,7 @@ class GpuMultinomialFromUniform(MultinomialFromUniform, GpuOp):
raise TypeError('pvals must be cudandarray', pvals) raise TypeError('pvals must be cudandarray', pvals)
if not isinstance(unis.type, CudaNdarrayType): if not isinstance(unis.type, CudaNdarrayType):
raise TypeError('unis must be cudandarray', unis) raise TypeError('unis must be cudandarray', unis)
if self.odtype=='auto': if self.odtype == 'auto':
odtype = pvals.dtype odtype = pvals.dtype
else: else:
odtype = self.odtype odtype = self.odtype
...@@ -173,7 +181,9 @@ class GpuMultinomialFromUniform(MultinomialFromUniform, GpuOp): ...@@ -173,7 +181,9 @@ class GpuMultinomialFromUniform(MultinomialFromUniform, GpuOp):
raise NotImplementedError( raise NotImplementedError(
'GpuMultinomialFromUniform works only if ' 'GpuMultinomialFromUniform works only if '
'self.odtype == pvals.dtype', odtype, pvals.dtype) 'self.odtype == pvals.dtype', odtype, pvals.dtype)
return Apply(self, [pvals, unis], [pvals.type()]) br = (pvals.broadcastable[1], pvals.broadcastable[0])
out = CudaNdarrayType(broadcastable=br)()
return Apply(self, [pvals, unis], [out])
def perform(self, node, ins, outs): def perform(self, node, ins, outs):
#The perform from parent don't work with CudaNdarray. We #The perform from parent don't work with CudaNdarray. We
...@@ -226,7 +236,6 @@ class GpuMultinomialFromUniform(MultinomialFromUniform, GpuOp): ...@@ -226,7 +236,6 @@ class GpuMultinomialFromUniform(MultinomialFromUniform, GpuOp):
""" % locals() """ % locals()
def c_code(self, node, name, ins, outs, sub): def c_code(self, node, name, ins, outs, sub):
(pvals, unis) = ins (pvals, unis) = ins
(z,) = outs (z,) = outs
...@@ -327,25 +336,30 @@ class GpuMultinomialFromUniform(MultinomialFromUniform, GpuOp): ...@@ -327,25 +336,30 @@ class GpuMultinomialFromUniform(MultinomialFromUniform, GpuOp):
} // END NESTED SCOPE } // END NESTED SCOPE
""" % locals() """ % locals()
@local_optimizer() @local_optimizer()
def local_gpu_multinomial(node): def local_gpu_multinomial(node):
if type(node.op) is MultinomialFromUniform: if type(node.op) is MultinomialFromUniform:
p, u = node.inputs p, u = node.inputs
m, = node.outputs m, = node.outputs
if (p.dtype == u.dtype == m.dtype == 'float32' and if (p.dtype == u.dtype == m.dtype == 'float32' and
any([i.owner and isinstance(i.owner.op, theano.sandbox.cuda.HostFromGpu) any([i.owner and isinstance(i.owner.op,
theano.sandbox.cuda.HostFromGpu)
for i in node.inputs])): for i in node.inputs])):
gpu_op = GpuMultinomialFromUniform(node.op.odtype) gpu_op = GpuMultinomialFromUniform(node.op.odtype)
return [host_from_gpu(gpu_op(*[gpu_from_host(i) for i in node.inputs])).T] return [host_from_gpu(gpu_op(*[gpu_from_host(i)
for i in node.inputs])).T]
if (isinstance(node.op, theano.sandbox.cuda.GpuFromHost) and if (isinstance(node.op, theano.sandbox.cuda.GpuFromHost) and
node.inputs[0].owner and type(node.inputs[0].owner.op) is MultinomialFromUniform): node.inputs[0].owner and type(node.inputs[0].owner.op)
is MultinomialFromUniform):
multi = node.inputs[0].owner multi = node.inputs[0].owner
p, u = multi.inputs p, u = multi.inputs
m, = multi.outputs m, = multi.outputs
if (p.dtype == u.dtype == m.dtype == 'float32'): if (p.dtype == u.dtype == m.dtype == 'float32'):
gpu_op = GpuMultinomialFromUniform(multi.op.odtype) gpu_op = GpuMultinomialFromUniform(multi.op.odtype)
ret = gpu_op(*[gpu_from_host(i) for i in multi.inputs]).T ret = gpu_op(*[gpu_from_host(i) for i in multi.inputs]).T
# The dimshuffle is on the cpu, but will be moved to the gpu by an opt. # The dimshuffle is on the cpu, but will be moved to the
# gpu by an opt.
return [gpu_from_host(ret)] return [gpu_from_host(ret)]
if cuda_available: if cuda_available:
......
...@@ -9,15 +9,19 @@ from theano.compile.mode import get_default_mode, predefined_linkers ...@@ -9,15 +9,19 @@ from theano.compile.mode import get_default_mode, predefined_linkers
from theano.gof.python25 import any from theano.gof.python25 import any
import theano.sandbox.cuda as cuda import theano.sandbox.cuda as cuda
def get_mode(gpu): def get_mode(gpu):
mode = get_default_mode() mode = get_default_mode()
mode = copy.copy(mode) mode = copy.copy(mode)
if gpu: if gpu:
mode = mode.including('gpu', 'gpu_local_optimizations', 'local_cut_gpu_host_gpu', 'local_gpu_multinomial') mode = mode.including('gpu', 'gpu_local_optimizations',
'local_cut_gpu_host_gpu',
'local_gpu_multinomial')
if isinstance(mode.linker, theano.gof.PerformLinker): if isinstance(mode.linker, theano.gof.PerformLinker):
mode.linker = predefined_linkers['c|py'] mode.linker = predefined_linkers['c|py']
return mode return mode
def run_with_c(f, gpu=False): def run_with_c(f, gpu=False):
mode = get_mode(gpu) mode = get_mode(gpu)
f(mode, gpu) f(mode, gpu)
...@@ -30,52 +34,54 @@ def test_multinomial_0(): ...@@ -30,52 +34,54 @@ def test_multinomial_0():
p = tensor.fmatrix() p = tensor.fmatrix()
u = tensor.fvector() u = tensor.fvector()
m = multinomial.MultinomialFromUniform('auto')(p,u) m = multinomial.MultinomialFromUniform('auto')(p, u)
def body(mode, gpu): def body(mode, gpu):
#the m*2 allows the multinomial to reuse output #the m*2 allows the multinomial to reuse output
f = function([p,u], m*2, allow_input_downcast=True, mode=mode) f = function([p, u], m*2, allow_input_downcast=True, mode=mode)
if gpu: if gpu:
assert any([type(node.op) is multinomial.GpuMultinomialFromUniform for node in f.maker.fgraph.toposort()]) assert any([type(node.op) is multinomial.GpuMultinomialFromUniform
for node in f.maker.fgraph.toposort()])
# test that both first and second samples can be drawn # test that both first and second samples can be drawn
assert numpy.allclose(f([[1,0], [0,1]], [.1, .1]), assert numpy.allclose(f([[1, 0], [0, 1]], [.1, .1]),
[[2,0], [0,2]]) [[2, 0], [0, 2]])
# test that both second labels can be drawn # test that both second labels can be drawn
r = f([[.2,.8], [.3,.7]], [.31, .31]) r = f([[.2, .8], [.3, .7]], [.31, .31])
assert numpy.allclose(r, [[0,2], [0,2]]), r assert numpy.allclose(r, [[0, 2], [0, 2]]), r
# test that both first labels can be drawn # test that both first labels can be drawn
r = f([[.2,.8], [.3,.7]], [.21, .21]) r = f([[.2, .8], [.3, .7]], [.21, .21])
assert numpy.allclose(r, [[0,2], [2,0]]), r assert numpy.allclose(r, [[0, 2], [2, 0]]), r
#change the size to make sure output gets reallocated ok #change the size to make sure output gets reallocated ok
# and also make sure that the GPU version doesn't screw up the # and also make sure that the GPU version doesn't screw up the
# transposed-ness # transposed-ness
r = f([[.2,.8] ], [.25]) r = f([[.2, .8]], [.25])
assert numpy.allclose(r, [[0,2]]), r assert numpy.allclose(r, [[0, 2]]), r
run_with_c(body) run_with_c(body)
if cuda.cuda_available: if cuda.cuda_available:
run_with_c(body, True) run_with_c(body, True)
#TODO: check a bigger example (make sure blocking on GPU is handled correctly) #TODO: check a bigger example (make sure blocking on GPU is handled correctly)
def test_multinomial_large(): def test_multinomial_large():
# DEBUG_MODE will test this on GPU # DEBUG_MODE will test this on GPU
def body(mode, gpu): def body(mode, gpu):
p = tensor.fmatrix() p = tensor.fmatrix()
u = tensor.fvector() u = tensor.fvector()
m = multinomial.MultinomialFromUniform('auto')(p,u) m = multinomial.MultinomialFromUniform('auto')(p, u)
f = function([p,u], m*2, allow_input_downcast=True, mode=mode) f = function([p, u], m*2, allow_input_downcast=True, mode=mode)
if gpu: if gpu:
assert any([type(node.op) is multinomial.GpuMultinomialFromUniform for node in f.maker.fgraph.toposort()]) assert any([type(node.op) is multinomial.GpuMultinomialFromUniform
for node in f.maker.fgraph.toposort()])
pval = numpy.arange(10000 * 4, dtype='float32').reshape((10000, 4))+0.1 pval = numpy.arange(10000 * 4, dtype='float32').reshape((10000, 4))+0.1
pval = pval / pval.sum(axis=1)[:,None] pval = pval / pval.sum(axis=1)[:, None]
uval = numpy.ones_like(pval[:,0]) * 0.5 uval = numpy.ones_like(pval[:, 0]) * 0.5
mval = f(pval,uval) mval = f(pval, uval)
assert mval.shape == pval.shape assert mval.shape == pval.shape
if config.cast_policy == 'custom': if config.cast_policy == 'custom':
...@@ -88,7 +94,7 @@ def test_multinomial_large(): ...@@ -88,7 +94,7 @@ def test_multinomial_large():
raise NotImplementedError(config.cast_policy) raise NotImplementedError(config.cast_policy)
assert numpy.allclose(mval.sum(axis=1), 2) assert numpy.allclose(mval.sum(axis=1), 2)
asdf = numpy.asarray([0, 0, 2, 0])+0*pval asdf = numpy.asarray([0, 0, 2, 0])+0*pval
assert numpy.allclose(mval, asdf) #broadcast over all rows assert numpy.allclose(mval, asdf) # broadcast over all rows
run_with_c(body) run_with_c(body)
if cuda.cuda_available: if cuda.cuda_available:
run_with_c(body, True) run_with_c(body, True)
...@@ -97,36 +103,52 @@ def test_multinomial_large(): ...@@ -97,36 +103,52 @@ def test_multinomial_large():
def test_multinomial_dtypes(): def test_multinomial_dtypes():
p = tensor.dmatrix() p = tensor.dmatrix()
u = tensor.dvector() u = tensor.dvector()
m = multinomial.MultinomialFromUniform('auto')(p,u) m = multinomial.MultinomialFromUniform('auto')(p, u)
assert m.dtype == 'float64', m.dtype assert m.dtype == 'float64', m.dtype
p = tensor.fmatrix() p = tensor.fmatrix()
u = tensor.fvector() u = tensor.fvector()
m = multinomial.MultinomialFromUniform('auto')(p,u) m = multinomial.MultinomialFromUniform('auto')(p, u)
assert m.dtype == 'float32', m.dtype assert m.dtype == 'float32', m.dtype
p = tensor.fmatrix() p = tensor.fmatrix()
u = tensor.fvector() u = tensor.fvector()
m = multinomial.MultinomialFromUniform('float64')(p,u) m = multinomial.MultinomialFromUniform('float64')(p, u)
assert m.dtype == 'float64', m.dtype assert m.dtype == 'float64', m.dtype
def test_gpu_opt(): def test_gpu_opt():
if not cuda.cuda_available: if not cuda.cuda_available:
# Skip test if cuda_ndarray is not available. # Skip test if cuda_ndarray is not available.
from nose.plugins.skip import SkipTest from nose.plugins.skip import SkipTest
raise SkipTest('Optional package cuda not available') raise SkipTest('Optional package cuda not available')
# We test the case where we put the op on the gpu when the output is moved to the gpu. # We test the case where we put the op on the gpu when the output
# is moved to the gpu.
p = tensor.fmatrix() p = tensor.fmatrix()
u = tensor.fvector() u = tensor.fvector()
m = multinomial.MultinomialFromUniform('auto')(p,u) m = multinomial.MultinomialFromUniform('auto')(p, u)
assert m.dtype == 'float32', m.dtype assert m.dtype == 'float32', m.dtype
m_gpu = cuda.gpu_from_host(m) m_gpu = cuda.gpu_from_host(m)
f = function([p,u], m_gpu, allow_input_downcast=True, mode=get_mode(True)) f = function([p, u], m_gpu, allow_input_downcast=True, mode=get_mode(True))
assert any([type(node.op) is multinomial.GpuMultinomialFromUniform for node in f.maker.fgraph.toposort()]) assert any([type(node.op) is multinomial.GpuMultinomialFromUniform
for node in f.maker.fgraph.toposort()])
pval = numpy.arange(10000 * 4, dtype='float32').reshape((10000, 4))+0.1 pval = numpy.arange(10000 * 4, dtype='float32').reshape((10000, 4))+0.1
pval = pval / pval.sum(axis=1)[:,None] pval = pval / pval.sum(axis=1)[:, None]
uval = numpy.ones_like(pval[:,0]) * 0.5 uval = numpy.ones_like(pval[:, 0]) * 0.5
mval = f(pval,uval) mval = f(pval, uval)
# Test with a row, it was failing in the past.
r = tensor.frow()
m = multinomial.MultinomialFromUniform('auto')(r, u)
assert m.dtype == 'float32', m.dtype
m_gpu = cuda.gpu_from_host(m)
f = function([r, u], m_gpu, allow_input_downcast=True, mode=get_mode(True))
assert any([type(node.op) is multinomial.GpuMultinomialFromUniform
for node in f.maker.fgraph.toposort()])
pval = numpy.arange(1 * 4, dtype='float32').reshape((1, 4))+0.1
pval = pval / pval.sum(axis=1)[:, None]
uval = numpy.ones_like(pval[:, 0]) * 0.5
mval2 = f(pval, uval)
...@@ -4656,9 +4656,10 @@ def local_elemwise_fusion_op(OP, max_input_fct=lambda node: 1024): ...@@ -4656,9 +4656,10 @@ def local_elemwise_fusion_op(OP, max_input_fct=lambda node: 1024):
else: else:
s = scalar.Scalar(i.dtype).make_variable() s = scalar.Scalar(i.dtype).make_variable()
try: try:
if theano.config.compute_test_value != 'off':
v = gof.op.get_test_value(i) v = gof.op.get_test_value(i)
if v.size > 0: if v.size > 0:
s.tag.test_value = gof.op.get_test_value(i).flatten()[0] s.tag.test_value = v.flatten()[0]
except AttributeError: except AttributeError:
pass pass
......
...@@ -1146,7 +1146,7 @@ class TestAdvancedSubtensor(unittest.TestCase): ...@@ -1146,7 +1146,7 @@ class TestAdvancedSubtensor(unittest.TestCase):
subt = self.m[self.ix1, self.ix12] subt = self.m[self.ix1, self.ix12]
a = inc_subtensor(subt, subt) a = inc_subtensor(subt, subt)
typ = TensorType(self.m.type.dtype, self.ix2.type.broadcastable) typ = tensor.TensorType(self.m.type.dtype, self.ix2.type.broadcastable)
assert a.type == typ, (a.type, typ) assert a.type == typ, (a.type, typ)
f = theano.function([self.m, self.ix1, self.ix12], a, f = theano.function([self.m, self.ix1, self.ix12], a,
allow_input_downcast=True) allow_input_downcast=True)
......
from nose.plugins.skip import SkipTest
import unittest import unittest
import theano
import numpy import numpy
import random
import numpy.random import numpy.random
import theano
from theano.tests import unittest_tools as utt from theano.tests import unittest_tools as utt
''' '''
Different tests that are not connected to any particular Op, or functionality of Different tests that are not connected to any particular Op, or
Theano. Here will go for example code that we will publish in papers, that we functionality of Theano. Here will go for example code that we will
should ensure that it will remain operational publish in papers, that we should ensure that it will remain
operational
''' '''
class T_scipy(unittest.TestCase): class T_scipy(unittest.TestCase):
def setUp(self): def setUp(self):
utt.seed_rng() utt.seed_rng()
self.orig_floatX = theano.config.floatX self.orig_floatX = theano.config.floatX
def tearDown(self): def tearDown(self):
theano.config.floatX = self.orig_floatX theano.config.floatX = self.orig_floatX
def test_scipy_paper_example1(self): def test_scipy_paper_example1(self):
a = theano.tensor.vector('a') # declare variable a = theano.tensor.vector('a') # declare variable
b = a + a**10 # build expression b = a + a**10 # build expression
f = theano.function([a], b) # compile function f = theano.function([a], b) # compile function
assert numpy.all(f([0,1,2]) == numpy.array([0,2,1026])) assert numpy.all(f([0, 1, 2]) == numpy.array([0, 2, 1026]))
def test_scipy_paper_example2(self): def test_scipy_paper_example2(self):
''' This just sees if things compile well and if they run ''' ''' This just sees if things compile well and if they run '''
...@@ -34,7 +36,7 @@ class T_scipy(unittest.TestCase): ...@@ -34,7 +36,7 @@ class T_scipy(unittest.TestCase):
shared = theano.shared shared = theano.shared
function = theano.function function = theano.function
rng = numpy.random rng = numpy.random
theano.config.floatX='float64' theano.config.floatX = 'float64'
# #
# ACTUAL SCRIPT FROM PAPER # ACTUAL SCRIPT FROM PAPER
...@@ -49,18 +51,18 @@ class T_scipy(unittest.TestCase): ...@@ -49,18 +51,18 @@ class T_scipy(unittest.TestCase):
xent = -y*T.log(p_1) - (1-y)*T.log(1-p_1) xent = -y*T.log(p_1) - (1-y)*T.log(1-p_1)
prediction = p_1 > 0.5 prediction = p_1 > 0.5
cost = xent.mean() + 0.01*(w**2).sum() cost = xent.mean() + 0.01*(w**2).sum()
gw,gb = T.grad(cost, [w,b]) gw, gb = T.grad(cost, [w, b])
# Compile expressions to functions # Compile expressions to functions
train = function( train = function(
inputs=[x,y], inputs=[x, y],
outputs=[prediction, xent], outputs=[prediction, xent],
updates=[(w, w-0.1*gw), (b, b-0.1*gb)]) updates=[(w, w-0.1*gw), (b, b-0.1*gb)])
predict = function(inputs=[x], outputs=prediction) predict = function(inputs=[x], outputs=prediction)
N = 4 N = 4
feats = 100 feats = 100
D = (rng.randn(N, feats), rng.randint(size=4,low=0, high=2)) D = (rng.randn(N, feats), rng.randint(size=4, low=0, high=2))
training_steps = 10 training_steps = 10
for i in range(training_steps): for i in range(training_steps):
pred, err = train(D[0], D[1]) pred, err = train(D[0], D[1])
...@@ -68,4 +70,3 @@ class T_scipy(unittest.TestCase): ...@@ -68,4 +70,3 @@ class T_scipy(unittest.TestCase):
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论