提交 e75ca8a0 authored 作者: lamblin's avatar lamblin

Merge pull request #911 from nouiz/mixed

Mixed
...@@ -20,6 +20,7 @@ import logging ...@@ -20,6 +20,7 @@ import logging
_logger = logging.getLogger('theano.bin.theano-nose') _logger = logging.getLogger('theano.bin.theano-nose')
_logger.setLevel(logging.WARN) _logger.setLevel(logging.WARN)
import os
import nose import nose
import textwrap import textwrap
import sys import sys
...@@ -32,6 +33,23 @@ def main(): ...@@ -32,6 +33,23 @@ def main():
import theano import theano
sys.argv[i] = theano.__path__[0] sys.argv[i] = theano.__path__[0]
# Many Theano tests suppose device=cpu, so we need to raise an
# error if device==gpu.
# I don't know how to do this check only if we use theano-nose on
# Theano tests. So I make an try..except in case the script get
# reused elsewhere.
# We should not import theano before call nose.main()
# As this cause import problem with nosetests.
# Should we find a way to don't modify sys.path?
if not os.path.exists('theano/__init__.py'):
try:
from theano import config
if config.device != "cpu":
raise ValueError("Theano tests must be run with device=cpu."
" This will also run GPU tests when possible.")
except ImportError:
pass
# Handle --batch[=n] arguments # Handle --batch[=n] arguments
batch_args = [arg for arg in sys.argv if arg.startswith('--batch')] batch_args = [arg for arg in sys.argv if arg.startswith('--batch')]
for arg in batch_args: for arg in batch_args:
......
...@@ -506,3 +506,4 @@ Other tools that can help you ...@@ -506,3 +506,4 @@ Other tools that can help you
* `memory_profiler <http://fseoane.net/blog/2012/line-by-line-report-of-memory-usage/>`_: memory profiler * `memory_profiler <http://fseoane.net/blog/2012/line-by-line-report-of-memory-usage/>`_: memory profiler
* `runsnake <http://www.vrplumber.com/programming/runsnakerun/>`_: Gui for cProfile(time profiler) and Meliae(memory profiler) * `runsnake <http://www.vrplumber.com/programming/runsnakerun/>`_: Gui for cProfile(time profiler) and Meliae(memory profiler)
* `hub <https://github.com/defunkt/hub>`_: A tool that adds github commands to the git command line. * `hub <https://github.com/defunkt/hub>`_: A tool that adds github commands to the git command line.
* `git pull-requests <http://www.splitbrain.org/blog/2011-06/19-automate_github_pull_requests>`_: Another tool for git/github command line.
...@@ -51,6 +51,7 @@ installation and configuration, see :ref:`installing Theano <install>`. ...@@ -51,6 +51,7 @@ installation and configuration, see :ref:`installing Theano <install>`.
Master Tests Status: Master Tests Status:
.. image:: https://secure.travis-ci.org/Theano/Theano.png .. image:: https://secure.travis-ci.org/Theano/Theano.png
:target: http://travis-ci.org/Theano/Theano/builds
.. _available on PyPI: http://pypi.python.org/pypi/Theano .. _available on PyPI: http://pypi.python.org/pypi/Theano
.. _Related Projects: https://github.com/Theano/Theano/wiki/Related-projects .. _Related Projects: https://github.com/Theano/Theano/wiki/Related-projects
......
...@@ -206,6 +206,7 @@ Bleeding-edge install instructions ...@@ -206,6 +206,7 @@ Bleeding-edge install instructions
Master Tests Status: Master Tests Status:
.. image:: https://secure.travis-ci.org/Theano/Theano.png .. image:: https://secure.travis-ci.org/Theano/Theano.png
:target: http://travis-ci.org/Theano/Theano/builds
If you are a developer of Theano, then check out the :ref:`dev_start_guide`. If you are a developer of Theano, then check out the :ref:`dev_start_guide`.
......
...@@ -161,6 +161,14 @@ import theano and print the config variable, as in: ...@@ -161,6 +161,14 @@ import theano and print the config variable, as in:
Theano initialize the GPU device. Newer version of PyCUDA Theano initialize the GPU device. Newer version of PyCUDA
(currently only in the trunk) don't have this restriction. (currently only in the trunk) don't have this restriction.
.. attribute:: config.print_active_device
Bool value: either ``True`` or ``False``
Default: ``True``
Print active device at when the GPU device is initialized.
.. attribute:: floatX .. attribute:: floatX
String value: either 'float64' or 'float32' String value: either 'float64' or 'float32'
......
...@@ -76,7 +76,7 @@ AddConfigVar('force_device', ...@@ -76,7 +76,7 @@ AddConfigVar('force_device',
in_c_key=False) in_c_key=False)
AddConfigVar('print_active_device', AddConfigVar('print_active_device',
"Print active device at startup", "Print active device at when the GPU device is initialized.",
BoolParam(True, allow_override=False), BoolParam(True, allow_override=False),
in_c_key=False) in_c_key=False)
......
...@@ -1362,7 +1362,8 @@ class _CThunk(object): ...@@ -1362,7 +1362,8 @@ class _CThunk(object):
# this can be used to retrieve the location the Op was declared # this can be used to retrieve the location the Op was declared
exc_value.__thunk_trace__ = trace exc_value.__thunk_trace__ = trace
except Exception: except Exception:
print >> sys.stderr, 'ERROR retrieving error_storage', print >> sys.stderr, ('ERROR retrieving error_storage.'
' Was the error set in the c code?'),
print >> sys.stderr, self.error_storage print >> sys.stderr, self.error_storage
raise raise
......
...@@ -53,8 +53,8 @@ if [ "$RELEASE" ]; then ...@@ -53,8 +53,8 @@ if [ "$RELEASE" ]; then
ls ${COMPILEDIR}|wc -l ls ${COMPILEDIR}|wc -l
fi fi
echo "Executing nosetests with mode=FAST_COMPILE" echo "Executing nosetests with mode=FAST_COMPILE with --batch=1000"
THEANO_FLAGS=${FLAGS},mode=FAST_COMPILE ${NOSETESTS} ${ARGS} THEANO_FLAGS=${FLAGS},mode=FAST_COMPILE ${NOSETESTS} --batch=1000 ${ARGS}
echo "Number of elements in the compiledir:" echo "Number of elements in the compiledir:"
ls ${COMPILEDIR}|wc -l ls ${COMPILEDIR}|wc -l
......
...@@ -86,8 +86,9 @@ class HostFromGpu(GpuOp): ...@@ -86,8 +86,9 @@ class HostFromGpu(GpuOp):
fail = sub['fail'] fail = sub['fail']
return """ return """
%(out)s = (PyArrayObject *) CudaNdarray_CreateArrayObj(%(inp)s); %(out)s = (PyArrayObject *) CudaNdarray_CreateArrayObj(%(inp)s);
if(!%(out)s) if(!%(out)s){
%(fail)s; %(fail)s;
}
""" % locals() """ % locals()
def c_code_cache_version(self): def c_code_cache_version(self):
...@@ -133,6 +134,27 @@ class GpuFromHost(GpuOp): ...@@ -133,6 +134,27 @@ class GpuFromHost(GpuOp):
def infer_shape(self, node, xshp): def infer_shape(self, node, xshp):
return xshp return xshp
def c_code(self, node, name, inputs, outputs, sub):
inp = inputs[0]
out = outputs[0]
fail = sub['fail']
return """
int err = 0;
Py_XDECREF(%(out)s);
%(out)s = (CudaNdarray*) CudaNdarray_New();
if(!%(out)s){
%(fail)s;
}
err = CudaNdarray_CopyFromArray(%(out)s, %(inp)s);
if(err){
%(fail)s;
}
""" % locals()
def c_code_cache_version(self):
return (1,)
gpu_from_host = GpuFromHost() gpu_from_host = GpuFromHost()
...@@ -1898,16 +1920,19 @@ class GpuSubtensor(tensor.Subtensor, GpuOp): ...@@ -1898,16 +1920,19 @@ class GpuSubtensor(tensor.Subtensor, GpuOp):
assert isinstance(x.type, CudaNdarrayType) assert isinstance(x.type, CudaNdarrayType)
rval = tensor.Subtensor.make_node(self, x, *inputs) rval = tensor.Subtensor.make_node(self, x, *inputs)
otype = CudaNdarrayType(rval.outputs[0].type.broadcastable) otype = CudaNdarrayType(rval.outputs[0].type.broadcastable)
return Apply(self, [x] + rval.inputs[1:], [otype()]) #We reverse the index here as a speed optimization
#this opt was saving 0.40e-05s of 3.49e05s
return Apply(self, [x] + list(reversed(rval.inputs[1:])), [otype()])
def perform(self, node, inputs, out_): def perform(self, node, inputs, out_):
out, = out_ out, = out_
x = inputs[0] x = inputs[0]
indices = list(reversed(inputs[1:])) indices = inputs[1:]
def convert(entry): def convert(entry):
if isinstance(entry, Type): if isinstance(entry, Type):
rval = indices.pop() rval = indices.pop()
#the if take about .25e-05s
if sys.version_info < (2, 5): if sys.version_info < (2, 5):
# Before Python 2.5, PySlice_GetIndicesEx requires # Before Python 2.5, PySlice_GetIndicesEx requires
# Python int to be passed. # Python int to be passed.
......
...@@ -175,6 +175,12 @@ class GpuMultinomialFromUniform(MultinomialFromUniform, GpuOp): ...@@ -175,6 +175,12 @@ class GpuMultinomialFromUniform(MultinomialFromUniform, GpuOp):
'self.odtype == pvals.dtype', odtype, pvals.dtype) 'self.odtype == pvals.dtype', odtype, pvals.dtype)
return Apply(self, [pvals, unis], [pvals.type()]) return Apply(self, [pvals, unis], [pvals.type()])
def perform(self, node, ins, outs):
#The perform from parent don't work with CudaNdarray. We
#don't need it as DebugMode will test again it as an
#optimization insert the GPU op.
return Op.perform(self, node, ins, outs)
def c_code_cache_version(self): def c_code_cache_version(self):
return (8,) return (8,)
......
...@@ -5,7 +5,6 @@ TODO: implement Images2Neibs.{perform,infer_shape}() methods ...@@ -5,7 +5,6 @@ TODO: implement Images2Neibs.{perform,infer_shape}() methods
import theano import theano
from theano import Op, Apply from theano import Op, Apply
import theano.tensor as T import theano.tensor as T
from theano.gof import local_optimizer
from theano.gradient import grad_not_implemented from theano.gradient import grad_not_implemented
......
...@@ -1193,7 +1193,7 @@ class UsmmTests(unittest.TestCase): ...@@ -1193,7 +1193,7 @@ class UsmmTests(unittest.TestCase):
theano.tensor.basic.float64_atol = orig_atol theano.tensor.basic.float64_atol = orig_atol
theano.tensor.basic.float64_rtol = orig_rtol theano.tensor.basic.float64_rtol = orig_rtol
assert _allclose(f_a_out, f_b_out, rtol=1e-5) assert _allclose(f_a_out, f_b_out, rtol=1e-5), (f_a_out, f_b_out)
topo = f_a.maker.fgraph.toposort() topo = f_a.maker.fgraph.toposort()
up = theano.scalar.upcast(dtype1, dtype2, dtype3, dtype4) up = theano.scalar.upcast(dtype1, dtype2, dtype3, dtype4)
......
...@@ -246,7 +246,8 @@ class Gemv(Op): ...@@ -246,7 +246,8 @@ class Gemv(Op):
gemv_no_inplace = Gemv(inplace=False) gemv_no_inplace = Gemv(inplace=False)
gemv_inplace = Gemv(inplace=True) gemv_inplace = Gemv(inplace=True)
# For the user interface. Opt will make them inplace later
gemv = gemv_no_inplace
class Ger(Op): class Ger(Op):
""" """
...@@ -991,6 +992,8 @@ class Gemm(GemmRelated): ...@@ -991,6 +992,8 @@ class Gemm(GemmRelated):
gemm_inplace = Gemm(inplace=True) gemm_inplace = Gemm(inplace=True)
gemm_no_inplace = Gemm(inplace=False) gemm_no_inplace = Gemm(inplace=False)
# For the user interface. Theano optimization will make them inplace
gemm = gemm_no_inplace
pprint.assign(gemm_inplace, FunctionPrinter('gemm_inplace')) pprint.assign(gemm_inplace, FunctionPrinter('gemm_inplace'))
pprint.assign(gemm_no_inplace, FunctionPrinter('gemm_no_inplace')) pprint.assign(gemm_no_inplace, FunctionPrinter('gemm_no_inplace'))
......
...@@ -1409,6 +1409,7 @@ GammaTester = makeBroadcastTester( ...@@ -1409,6 +1409,7 @@ GammaTester = makeBroadcastTester(
good=_good_broadcast_unary_gammaln, good=_good_broadcast_unary_gammaln,
grad=_grad_broadcast_unary_gammaln, grad=_grad_broadcast_unary_gammaln,
mode=mode_no_scipy, mode=mode_no_scipy,
eps=1e-5,
skip=skip_scipy) skip=skip_scipy)
GammaInplaceTester = makeBroadcastTester( GammaInplaceTester = makeBroadcastTester(
op=inplace.gamma_inplace, op=inplace.gamma_inplace,
...@@ -1416,6 +1417,7 @@ GammaInplaceTester = makeBroadcastTester( ...@@ -1416,6 +1417,7 @@ GammaInplaceTester = makeBroadcastTester(
good=_good_broadcast_unary_gammaln, good=_good_broadcast_unary_gammaln,
grad=_grad_broadcast_unary_gammaln, grad=_grad_broadcast_unary_gammaln,
mode=mode_no_scipy, mode=mode_no_scipy,
eps=1e-5,
inplace=True, inplace=True,
skip=skip_scipy) skip=skip_scipy)
......
...@@ -3736,7 +3736,7 @@ class Test_lift_transpose_through_dot(unittest.TestCase): ...@@ -3736,7 +3736,7 @@ class Test_lift_transpose_through_dot(unittest.TestCase):
def test_local_upcast_elemwise_constant_inputs(): def test_local_upcast_elemwise_constant_inputs():
s = dvector("s") s = dvector("s")
x = tensor.sum(tensor.log(10**s)) x = tensor.sum(tensor.log(10 ** s))
f = function([s], [tensor.grad(x, s)]) f = function([s], [tensor.grad(x, s)])
f([-42, -2.1, -1, -0.5, 0, 0.2, 1, 2, 12]) f([-42, -2.1, -1, -0.5, 0, 0.2, 1, 2, 12])
...@@ -3748,22 +3748,22 @@ class TestShape_i(utt.InferShapeTester): ...@@ -3748,22 +3748,22 @@ class TestShape_i(utt.InferShapeTester):
def test_perform(self): def test_perform(self):
advec = dvector() advec = vector()
advec_val = numpy.random.rand(3) advec_val = numpy.random.rand(3).astype(config.floatX)
f = function([advec], Shape_i(0)(advec)) f = function([advec], Shape_i(0)(advec))
out = f(advec_val) out = f(advec_val)
assert numpy.allclose(out, advec_val.shape[0]) assert numpy.allclose(out, advec_val.shape[0])
admat = dmatrix() admat = matrix()
admat_val = numpy.random.rand(4, 3) admat_val = numpy.random.rand(4, 3).astype(config.floatX)
for i in xrange(2): for i in xrange(2):
f = function([admat], Shape_i(i)(admat)) f = function([admat], Shape_i(i)(admat))
out = f(admat_val) out = f(admat_val)
assert numpy.allclose(out, admat_val.shape[i]) assert numpy.allclose(out, admat_val.shape[i])
def test_infer_shape(self): def test_infer_shape(self):
admat = dmatrix() admat = matrix()
admat_val = numpy.random.rand(3, 4) admat_val = numpy.random.rand(3, 4).astype(config.floatX)
self._compile_and_check([admat], [Shape_i(0)(admat)], self._compile_and_check([admat], [Shape_i(0)(admat)],
[admat_val], Shape_i) [admat_val], Shape_i)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论