提交 e75ca8a0 authored 作者: lamblin's avatar lamblin

Merge pull request #911 from nouiz/mixed

Mixed
......@@ -20,6 +20,7 @@ import logging
_logger = logging.getLogger('theano.bin.theano-nose')
_logger.setLevel(logging.WARN)
import os
import nose
import textwrap
import sys
......@@ -32,6 +33,23 @@ def main():
import theano
sys.argv[i] = theano.__path__[0]
# Many Theano tests suppose device=cpu, so we need to raise an
# error if device==gpu.
# I don't know how to do this check only if we use theano-nose on
# Theano tests. So I make an try..except in case the script get
# reused elsewhere.
# We should not import theano before call nose.main()
# As this cause import problem with nosetests.
# Should we find a way to don't modify sys.path?
if not os.path.exists('theano/__init__.py'):
try:
from theano import config
if config.device != "cpu":
raise ValueError("Theano tests must be run with device=cpu."
" This will also run GPU tests when possible.")
except ImportError:
pass
# Handle --batch[=n] arguments
batch_args = [arg for arg in sys.argv if arg.startswith('--batch')]
for arg in batch_args:
......
......@@ -506,3 +506,4 @@ Other tools that can help you
* `memory_profiler <http://fseoane.net/blog/2012/line-by-line-report-of-memory-usage/>`_: memory profiler
* `runsnake <http://www.vrplumber.com/programming/runsnakerun/>`_: Gui for cProfile(time profiler) and Meliae(memory profiler)
* `hub <https://github.com/defunkt/hub>`_: A tool that adds github commands to the git command line.
* `git pull-requests <http://www.splitbrain.org/blog/2011-06/19-automate_github_pull_requests>`_: Another tool for git/github command line.
......@@ -51,6 +51,7 @@ installation and configuration, see :ref:`installing Theano <install>`.
Master Tests Status:
.. image:: https://secure.travis-ci.org/Theano/Theano.png
:target: http://travis-ci.org/Theano/Theano/builds
.. _available on PyPI: http://pypi.python.org/pypi/Theano
.. _Related Projects: https://github.com/Theano/Theano/wiki/Related-projects
......
......@@ -206,6 +206,7 @@ Bleeding-edge install instructions
Master Tests Status:
.. image:: https://secure.travis-ci.org/Theano/Theano.png
:target: http://travis-ci.org/Theano/Theano/builds
If you are a developer of Theano, then check out the :ref:`dev_start_guide`.
......
......@@ -161,6 +161,14 @@ import theano and print the config variable, as in:
Theano initialize the GPU device. Newer version of PyCUDA
(currently only in the trunk) don't have this restriction.
.. attribute:: config.print_active_device
Bool value: either ``True`` or ``False``
Default: ``True``
Print active device at when the GPU device is initialized.
.. attribute:: floatX
String value: either 'float64' or 'float32'
......
......@@ -280,6 +280,13 @@ class Function(object):
A Function instance may be serialized using the `pickle` or `cPickle` modules.
This will save all default inputs, the graph, and *** to the pickle file (WRITEME).
A Function instance have a ``trust_input`` field that default to
False. When True, we don't do extra check of the input to give
better error message. In some case, python code will still return
the good results if you pass a python or numpy scalar instead of a
numpy tensor. C code should raise an error if you pass an object
of the wrong type.
"""
pickle_aliased_memory_strategy = 'warn'
......@@ -351,12 +358,12 @@ class Function(object):
It maps container -> SymbolicInput
"""
def __init__(self, fn, input_storage, output_storage, indices, outputs, defaults, unpack_single, return_none, maker):
def __init__(self, fn, input_storage, output_storage, indices, outputs,
defaults, unpack_single, return_none, maker):
"""
Initialize attributes. create finder, inv_finder.
"""
self.fn = fn
self.input_storage = input_storage
self.output_storage = output_storage
......@@ -366,7 +373,8 @@ class Function(object):
self.unpack_single = unpack_single
self.return_none = return_none
self.maker = maker
self.profile = None # reassigned in FunctionMaker.create
self.profile = None # reassigned in FunctionMaker.create
self.trust_input = False # If True, we don't check the input parameter
# We will be popping stuff off this `containers` object. It is a copy.
containers = list(self.input_storage)
......@@ -487,7 +495,8 @@ class Function(object):
except KeyError:
# Print informative error message.
msg = get_info_on_inputs(named_inputs, n_unnamed_inputs)
raise TypeError("Unknown input or state: %s. %s" % (str(item), msg))
raise TypeError("Unknown input or state: %s. %s" %
(str(item), msg))
if s is DUPLICATE:
raise TypeError("Ambiguous name: %s - please check the names "\
"of the inputs of your function for duplicates." % str(item))
......@@ -531,11 +540,12 @@ class Function(object):
def __setitem__(self, item, value):
self.value[item] = value
def __copy__(self):
defaults = [default for _1, _2, default in self.defaults]
cpy = self.maker.create(defaults, trustme = True)
for (input,_1,_2), here, there in zip(self.indices, self.input_storage, cpy.input_storage):
cpy = self.maker.create(defaults, trustme=True)
for (input, _1, _2), here, there in zip(self.indices,
self.input_storage,
cpy.input_storage):
if input.mutable and here is not None:
there.data = copy.copy(here.data)
else:
......@@ -547,54 +557,62 @@ class Function(object):
t0 = time.time()
# Reinitialize each container's 'provided' counter
for c in self.input_storage:
c.provided = 0
if len(args)+len(kwargs)>len(self.input_storage):
raise TypeError("Too many parameter passed to theano function")
# Set positional arguments
i = 0
for arg in args:
#TODO: provide a Param option for skipping the filter if we
# really want speed.
s = self.input_storage[i]
# see this emails for a discuation about None as input
# https://groups.google.com/group/theano-dev/browse_thread/thread/920a5e904e8a8525/4f1b311a28fc27e5
if arg is None:
if self.trust_input:
i = 0
for arg in args:
s = self.input_storage[i]
s.storage[0] = arg
else:
try:
s.storage[0] = s.type.filter(arg, strict=s.strict,
allow_downcast=s.allow_downcast)
except Exception, e:
function_name="theano function"
if self.name:
function_name += 'with name "'+self.name+'" '
#end if
e.args = tuple(["Bad input argument to " + function_name +
" at index %d(0-based)" % i] + list(e.args))
raise
#end except
#end if
s.provided += 1
i+=1
i += 1
else:
for c in self.input_storage:
c.provided = 0
if len(args) + len(kwargs) > len(self.input_storage):
raise TypeError("Too many parameter passed to theano function")
# Set positional arguments
i = 0
for arg in args:
#TODO: provide a Param option for skipping the filter if we
# really want speed.
s = self.input_storage[i]
# see this emails for a discuation about None as input
# https://groups.google.com/group/theano-dev/browse_thread/thread/920a5e904e8a8525/4f1b311a28fc27e5
if arg is None:
s.storage[0] = arg
else:
try:
s.storage[0] = s.type.filter(arg, strict=s.strict,
allow_downcast=s.allow_downcast)
except Exception, e:
function_name = "theano function"
if self.name:
function_name += 'with name "' + self.name + '" '
#end if
e.args = tuple(["Bad input argument to " + function_name +
" at index %d(0-based)" % i] +
list(e.args))
raise
#end except
#end if
s.provided += 1
i += 1
# Set keyword arguments
if kwargs: # for speed, skip the iteritems for empty kwargs
for k, arg in kwargs.iteritems():
self[k] = arg
if (not hasattr(self, '_check_for_aliased_inputs') or
if not self.trust_input and (
not hasattr(self, '_check_for_aliased_inputs') or
self._check_for_aliased_inputs):
## Collect aliased inputs among the storage space
args_share_memory = []
for i in xrange(len(self.input_storage)):
i_var = self.maker.inputs[i].variable
i_val = self.input_storage[i].storage[0]
if hasattr( i_var.type, 'may_share_memory'):
if hasattr(i_var.type, 'may_share_memory'):
is_aliased = False
for j in xrange(len(args_share_memory)):
......@@ -603,9 +621,9 @@ class Function(object):
in args_share_memory[j]],
[self.input_storage[k].storage[0] for k
in args_share_memory[j]])
if numpy.any([ (var.type is i_var.type and
var.type.may_share_memory(val,i_val)
) for (var,val) in group_j]):
if numpy.any([(var.type is i_var.type and
var.type.may_share_memory(val,i_val))
for (var,val) in group_j]):
is_aliased = True
args_share_memory[j].append(i)
......@@ -619,27 +637,30 @@ class Function(object):
if len(group) > 1:
# see if any of these arguments are mutable
mutable = numpy.any([(self.maker.inputs[idx].mutable or
self.maker.inputs[idx].borrow )
for idx in group ])
self.maker.inputs[idx].borrow)
for idx in group])
# copy all but the first
for idx in group[1:]:
self.input_storage[i].storage[0] = copy.copy(
self.input_storage[i].storage[0])
# Check if inputs are missing, or if inputs were set more than once, or
# if we tried to provide inputs that are supposed to be implicit.
for c in self.input_storage:
if c.required and not c.provided:
raise TypeError("Missing required input: %s" % getattr(self.inv_finder[c], 'variable', self.inv_finder[c]))
if c.provided > 1:
raise TypeError("Multiple values for input: %s" % getattr(self.inv_finder[c], 'variable', self.inv_finder[c]))
if c.implicit and c.provided > 0:
raise TypeError('Tried to provide value for implicit input: %s'
if not self.trust_input:
for c in self.input_storage:
if c.required and not c.provided:
raise TypeError("Missing required input: %s" %
getattr(self.inv_finder[c], 'variable',
self.inv_finder[c]))
if c.provided > 1:
raise TypeError("Multiple values for input: %s" %
getattr(self.inv_finder[c], 'variable',
self.inv_finder[c]))
if c.implicit and c.provided > 0:
raise TypeError(
'Tried to provide value for implicit input: %s'
% getattr(self.inv_finder[c], 'variable',
self.inv_finder[c]))
self.inv_finder[c]))
# Do the actual work
t0_fn = time.time()
......@@ -671,11 +692,12 @@ class Function(object):
if c.required:
c.storage[0] = None
# if we are allowing garbage collection, remove the input and output reference from the internal
# storage cells
# if we are allowing garbage collection, remove the input and
# output reference from the internal storage cells
if getattr(self.fn, 'allow_gc', False):
assert len(self.output_storage) == len(self.maker.fgraph.outputs)
for o_container, o_variable in zip(self.output_storage, self.maker.fgraph.outputs):
for o_container, o_variable in zip(self.output_storage,
self.maker.fgraph.outputs):
if o_variable.owner is not None:
# this node is the variable of computation
# WARNING: This circumvents the 'readonly' attribute in x
......@@ -683,7 +705,8 @@ class Function(object):
if getattr(self.fn, 'need_update_inputs', True):
# Update the inputs that have an update function
for input, storage in reversed(zip(self.maker.expanded_inputs, self.input_storage)):
for input, storage in reversed(zip(self.maker.expanded_inputs,
self.input_storage)):
if input.update is not None:
storage.data = outputs.pop()
else:
......@@ -718,15 +741,16 @@ class Function(object):
value = property(
lambda self: self._value,
None, # this property itself is not settable
doc="""dictionary-like access to the values associated with Variables""")
None, # this property itself is not settable
doc="dictionary-like access to the values associated with Variables")
container = property(
lambda self: self._container,
None, # this property itself is not settable
None, # this property itself is not settable
doc="""dictionary-like access to the containers associated with Variables""")
# pickling/deepcopy support for Function
def _pickle_Function(f):
#copy of the input storage list
ins = list(f.input_storage)
......
......@@ -76,7 +76,7 @@ AddConfigVar('force_device',
in_c_key=False)
AddConfigVar('print_active_device',
"Print active device at startup",
"Print active device at when the GPU device is initialized.",
BoolParam(True, allow_override=False),
in_c_key=False)
......
......@@ -1362,7 +1362,8 @@ class _CThunk(object):
# this can be used to retrieve the location the Op was declared
exc_value.__thunk_trace__ = trace
except Exception:
print >> sys.stderr, 'ERROR retrieving error_storage',
print >> sys.stderr, ('ERROR retrieving error_storage.'
' Was the error set in the c code?'),
print >> sys.stderr, self.error_storage
raise
......
......@@ -53,8 +53,8 @@ if [ "$RELEASE" ]; then
ls ${COMPILEDIR}|wc -l
fi
echo "Executing nosetests with mode=FAST_COMPILE"
THEANO_FLAGS=${FLAGS},mode=FAST_COMPILE ${NOSETESTS} ${ARGS}
echo "Executing nosetests with mode=FAST_COMPILE with --batch=1000"
THEANO_FLAGS=${FLAGS},mode=FAST_COMPILE ${NOSETESTS} --batch=1000 ${ARGS}
echo "Number of elements in the compiledir:"
ls ${COMPILEDIR}|wc -l
......
......@@ -86,8 +86,9 @@ class HostFromGpu(GpuOp):
fail = sub['fail']
return """
%(out)s = (PyArrayObject *) CudaNdarray_CreateArrayObj(%(inp)s);
if(!%(out)s)
if(!%(out)s){
%(fail)s;
}
""" % locals()
def c_code_cache_version(self):
......@@ -133,6 +134,27 @@ class GpuFromHost(GpuOp):
def infer_shape(self, node, xshp):
return xshp
def c_code(self, node, name, inputs, outputs, sub):
inp = inputs[0]
out = outputs[0]
fail = sub['fail']
return """
int err = 0;
Py_XDECREF(%(out)s);
%(out)s = (CudaNdarray*) CudaNdarray_New();
if(!%(out)s){
%(fail)s;
}
err = CudaNdarray_CopyFromArray(%(out)s, %(inp)s);
if(err){
%(fail)s;
}
""" % locals()
def c_code_cache_version(self):
return (1,)
gpu_from_host = GpuFromHost()
......@@ -1898,16 +1920,19 @@ class GpuSubtensor(tensor.Subtensor, GpuOp):
assert isinstance(x.type, CudaNdarrayType)
rval = tensor.Subtensor.make_node(self, x, *inputs)
otype = CudaNdarrayType(rval.outputs[0].type.broadcastable)
return Apply(self, [x] + rval.inputs[1:], [otype()])
#We reverse the index here as a speed optimization
#this opt was saving 0.40e-05s of 3.49e05s
return Apply(self, [x] + list(reversed(rval.inputs[1:])), [otype()])
def perform(self, node, inputs, out_):
out, = out_
x = inputs[0]
indices = list(reversed(inputs[1:]))
indices = inputs[1:]
def convert(entry):
if isinstance(entry, Type):
rval = indices.pop()
#the if take about .25e-05s
if sys.version_info < (2, 5):
# Before Python 2.5, PySlice_GetIndicesEx requires
# Python int to be passed.
......
......@@ -175,6 +175,12 @@ class GpuMultinomialFromUniform(MultinomialFromUniform, GpuOp):
'self.odtype == pvals.dtype', odtype, pvals.dtype)
return Apply(self, [pvals, unis], [pvals.type()])
def perform(self, node, ins, outs):
#The perform from parent don't work with CudaNdarray. We
#don't need it as DebugMode will test again it as an
#optimization insert the GPU op.
return Op.perform(self, node, ins, outs)
def c_code_cache_version(self):
return (8,)
......
......@@ -5,7 +5,6 @@ TODO: implement Images2Neibs.{perform,infer_shape}() methods
import theano
from theano import Op, Apply
import theano.tensor as T
from theano.gof import local_optimizer
from theano.gradient import grad_not_implemented
......
......@@ -1193,7 +1193,7 @@ class UsmmTests(unittest.TestCase):
theano.tensor.basic.float64_atol = orig_atol
theano.tensor.basic.float64_rtol = orig_rtol
assert _allclose(f_a_out, f_b_out, rtol=1e-5)
assert _allclose(f_a_out, f_b_out, rtol=1e-5), (f_a_out, f_b_out)
topo = f_a.maker.fgraph.toposort()
up = theano.scalar.upcast(dtype1, dtype2, dtype3, dtype4)
......
......@@ -246,7 +246,8 @@ class Gemv(Op):
gemv_no_inplace = Gemv(inplace=False)
gemv_inplace = Gemv(inplace=True)
# For the user interface. Opt will make them inplace later
gemv = gemv_no_inplace
class Ger(Op):
"""
......@@ -991,6 +992,8 @@ class Gemm(GemmRelated):
gemm_inplace = Gemm(inplace=True)
gemm_no_inplace = Gemm(inplace=False)
# For the user interface. Theano optimization will make them inplace
gemm = gemm_no_inplace
pprint.assign(gemm_inplace, FunctionPrinter('gemm_inplace'))
pprint.assign(gemm_no_inplace, FunctionPrinter('gemm_no_inplace'))
......
......@@ -1409,6 +1409,7 @@ GammaTester = makeBroadcastTester(
good=_good_broadcast_unary_gammaln,
grad=_grad_broadcast_unary_gammaln,
mode=mode_no_scipy,
eps=1e-5,
skip=skip_scipy)
GammaInplaceTester = makeBroadcastTester(
op=inplace.gamma_inplace,
......@@ -1416,6 +1417,7 @@ GammaInplaceTester = makeBroadcastTester(
good=_good_broadcast_unary_gammaln,
grad=_grad_broadcast_unary_gammaln,
mode=mode_no_scipy,
eps=1e-5,
inplace=True,
skip=skip_scipy)
......
......@@ -3736,7 +3736,7 @@ class Test_lift_transpose_through_dot(unittest.TestCase):
def test_local_upcast_elemwise_constant_inputs():
s = dvector("s")
x = tensor.sum(tensor.log(10**s))
x = tensor.sum(tensor.log(10 ** s))
f = function([s], [tensor.grad(x, s)])
f([-42, -2.1, -1, -0.5, 0, 0.2, 1, 2, 12])
......@@ -3748,22 +3748,22 @@ class TestShape_i(utt.InferShapeTester):
def test_perform(self):
advec = dvector()
advec_val = numpy.random.rand(3)
advec = vector()
advec_val = numpy.random.rand(3).astype(config.floatX)
f = function([advec], Shape_i(0)(advec))
out = f(advec_val)
assert numpy.allclose(out, advec_val.shape[0])
admat = dmatrix()
admat_val = numpy.random.rand(4, 3)
admat = matrix()
admat_val = numpy.random.rand(4, 3).astype(config.floatX)
for i in xrange(2):
f = function([admat], Shape_i(i)(admat))
out = f(admat_val)
assert numpy.allclose(out, admat_val.shape[i])
def test_infer_shape(self):
admat = dmatrix()
admat_val = numpy.random.rand(3, 4)
admat = matrix()
admat_val = numpy.random.rand(3, 4).astype(config.floatX)
self._compile_and_check([admat], [Shape_i(0)(admat)],
[admat_val], Shape_i)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论