提交 3a2e9988 authored 作者: lamblin's avatar lamblin

Merge pull request #1116 from nouiz/paper

Tech report and uint*
......@@ -6,7 +6,7 @@ Theano is a Python library that allows you to define, optimize, and
evaluate mathematical expressions involving multi-dimensional
arrays efficiently. Theano features:
* **tight integration with numpy** -- Use `numpy.ndarray` in Theano-compiled functions.
* **tight integration with NumPy** -- Use `numpy.ndarray` in Theano-compiled functions.
* **transparent use of a GPU** -- Perform data-intensive calculations up to 140x faster than with CPU.(float32 only)
* **efficient symbolic differentiation** -- Theano does your derivatives for function with one or many inputs.
* **speed and stability optimizations** -- Get the right answer for ``log(1+x)`` even when ``x`` is really tiny.
......@@ -17,12 +17,19 @@ Theano has been powering large-scale computationally intensive scientific invest
since 2007. But it is also approachable enough to be used in the classroom
(IFT6266 at the University of Montreal).
News
====
* New technical report on Theano: `Theano: new features and speed improvements <http://arxiv.org/abs/1211.5590>`_. Please cite the other paper below.
* Theano 0.6rc2 was released. Everybody is encouraged to update.
* `HPCS 2011 Tutorial <http://www.iro.umontreal.ca/~lisa/pointeurs/tutorial_hpcs2011_fixed.pdf>`_. I included a few fix discovered while doing the Tutorial.
.. image:: images/talk2010.png
:scale: 75%
:align: left
**NEW!** `HPCS 2011 Tutorial <http://www.iro.umontreal.ca/~lisa/pointeurs/tutorial_hpcs2011_fixed.pdf>`_. I included a few fix discovered while doing the Tutorial.
You can watch a quick (20 minute) introduction to Theano given as a talk at `SciPy 2010 <http://conference.scipy.org/scipy2010/>`_ via streaming (or downloaded) video:
`Transparent GPU Computing With Theano`_.
......
......@@ -166,8 +166,11 @@ def function(inputs, outputs=None, mode=None, updates=None, givens=None,
if isinstance(updates, dict) and \
not isinstance(updates, gof.python25.OrderedDict):
warnings.warn("Expected OrderedDict, got "+str(type(updates))+ "Using "
"a standard dictionary here results in "
warnings.warn(
"The parameter 'updates' of theano.function()"
" expect an OrderedDict,"
" got " + str(type(updates)) + "Using "
"a standard dictionary here results in "
"non-deterministic behavior. You should use an OrderedDict"
" if you are using python2.7 or use a list of (shared, update)"
" pairs. Do not just convert your dictionary to this type before"
......@@ -176,7 +179,8 @@ def function(inputs, outputs=None, mode=None, updates=None, givens=None,
if givens is None:
givens = []
if not isinstance(inputs, (list, tuple)):
raise Exception("Inputs variable of a Theano function should be contained in a list, even when there is a single input.")
raise Exception("Inputs variable of a Theano function should be"
" contained in a list, even when there is a single input.")
# compute some features of the arguments:
uses_In = any([isinstance(i, In) for i in inputs]) # N.B. the square brackets are ncessary
......
......@@ -54,10 +54,10 @@ class SoftmaxWithBias(gof.Op):
x = tensor.as_tensor_variable(x)
b = tensor.as_tensor_variable(b)
if x.type.ndim != 2 \
or x.type.dtype not in ['float32', 'float64']:
or x.type.dtype not in tensor.float_dtypes:
raise ValueError('x must be 2-d tensor of floats')
if b.type.ndim != 1 \
or x.type.dtype not in ['float32', 'float64']:
or x.type.dtype not in tensor.float_dtypes:
raise ValueError('b must be 1-d tensor of floats')
sm = x.type.make_variable()
......@@ -351,7 +351,7 @@ class Softmax(gof.Op):
def make_node(self, x):
x = tensor.as_tensor_variable(x)
if x.type.ndim not in (1, 2) \
or x.type.dtype not in ['float32', 'float64']:
or x.type.dtype not in tensor.float_dtypes:
raise ValueError('x must be 1-d or 2-d tensor of floats')
if x.ndim == 1:
x = tensor.shape_padleft(x, n_ones=1)
......@@ -746,14 +746,14 @@ class CrossentropySoftmaxArgmax1HotWithBias(gof.Op):
b = tensor.as_tensor_variable(b)
y_idx = tensor.as_tensor_variable(y_idx)
if x.type.ndim != 2 \
or x.type.dtype not in ['float32', 'float64']:
or x.type.dtype not in tensor.float_dtypes:
raise ValueError('x must be 2-d tensor of floats', x.type)
if b.type.ndim != 1 \
or x.type.dtype not in ['float32', 'float64']:
or x.type.dtype not in tensor.float_dtypes:
raise ValueError('b must be 1-d tensor of floats', b.type)
if y_idx.type.ndim != 1 \
or y_idx.type.dtype not in ['int8', 'int16', 'int32', 'int64']:
raise ValueError('y_idx must be 1-d tensor of ints', y_idx.type)
or y_idx.type.dtype not in tensor.discrete_dtypes:
raise ValueError('y_idx must be 1-d tensor of [u]ints', y_idx.type)
# TODO: Is this correct? It used to be y, not y_idx
nll = tensor.TensorType(x.type.dtype,
......@@ -884,15 +884,6 @@ class CrossentropySoftmaxArgmax1HotWithBias(gof.Op):
PyErr_SetString(PyExc_ValueError, "y_idx not 1d tensor");
%(fail)s;
}
if ((PyArray_DESCR(%(y_idx)s)->type_num != NPY_INT64)
&& (PyArray_DESCR(%(y_idx)s)->type_num != NPY_INT32)
&& (PyArray_DESCR(%(y_idx)s)->type_num != NPY_INT16)
&& (PyArray_DESCR(%(y_idx)s)->type_num != NPY_INT8))
{
PyErr_SetString(PyExc_TypeError,
"y_idx not int8, int16, int32, or int64");
%(fail)s;
}
if (PyArray_DIMS(%(x)s)[0] != PyArray_DIMS(%(y_idx)s)[0])
{
PyErr_Format(PyExc_ValueError,
......@@ -982,6 +973,15 @@ class CrossentropySoftmax1HotWithBiasDx (gof.Op):
dy = tensor.as_tensor_variable(dy)
sm = tensor.as_tensor_variable(sm)
y_idx = tensor.as_tensor_variable(y_idx)
if (dy.type.ndim != 1 or
dy.type.dtype not in tensor.float_dtypes):
raise ValueError('dy must be 1-d tensor of floats', dy.type)
if (sm.type.ndim != 2 or
sm.type.dtype not in tensor.float_dtypes):
raise ValueError('sm must be 2-d tensor of floats', sm.type)
if (y_idx.type.ndim != 1 or
y_idx.type.dtype not in tensor.discrete_dtypes):
raise ValueError('y_idx must be 1-d tensor of [u]ints', y_idx.type)
return Apply(self, [dy, sm, y_idx], [sm.type.make_variable()])
def perform(self, node, input_storage, output_storage):
......@@ -1012,7 +1012,7 @@ class CrossentropySoftmax1HotWithBiasDx (gof.Op):
return [g_dy, g_sm, g_y_idx]
def c_code_cache_version(self):
return (2,)
return (3,)
def c_code(self, node, name, inp, out, sub):
dnll, sm, y_idx = inp
......@@ -1034,15 +1034,6 @@ class CrossentropySoftmax1HotWithBiasDx (gof.Op):
"sm type should be float32 or float64");
%(fail)s;
}
if ((PyArray_DESCR(%(y_idx)s)->type_num != NPY_INT64)
&& (PyArray_DESCR(%(y_idx)s)->type_num != NPY_INT32)
&& (PyArray_DESCR(%(y_idx)s)->type_num != NPY_INT16)
&& (PyArray_DESCR(%(y_idx)s)->type_num != NPY_INT8))
{
PyErr_SetString(PyExc_TypeError,
"y_idx not int8, int16, int32, or int64");
%(fail)s;
}
if ((PyArray_NDIM(%(dnll)s) != 1)
|| (PyArray_NDIM(%(sm)s) != 2)
|| (PyArray_NDIM(%(y_idx)s) != 1))
......
......@@ -194,16 +194,20 @@ class T_CrossentropySoftmax1Hot(unittest.TestCase):
class T_CrossentropySoftmax1HotWithBiasDx(utt.InferShapeTester):
def test0(self):
def f(sm):
return (theano.tensor.nnet.crossentropy_softmax_1hot_with_bias_dx(
numpy.random.rand(10), # Gradient w.r.t. NLL.
sm, # Softmax output.
numpy.random.randint(low=0,
high=5, size=10))) # Class indices.
def ff(class_dtype):
def f(sm):
# Class indices
y = numpy.random.randint(low=0, high=5, size=10).astype(class_dtype)
return theano.tensor.nnet.crossentropy_softmax_1hot_with_bias_dx(
numpy.random.rand(10), # Gradient w.r.t. NLL.
sm, # Softmax output.
y)
return f
# Build a random softmax output whose rows sum to 1.
softmax_output = numpy.random.rand(10, 5)
softmax_output /= softmax_output.sum(axis=1).reshape(10, 1)
utt.verify_grad(f, [softmax_output])
for dtype in ['uint8', 'int8', 'uint64', 'int64']:
utt.verify_grad(ff(dtype), [softmax_output])
def test1(self):
rng = numpy.random.RandomState(utt.fetch_seed())
......@@ -243,11 +247,15 @@ class T_CrossentropySoftmaxArgmax1HotWithBias(utt.InferShapeTester):
n_samples = 3
# First test gradient when getting a gradient on the NLL output.
def grad_on_nll(x, b):
return self.op(x, b, y_idx=numpy.random.randint(
low=0, high=n_classes, size=n_samples))[0]
utt.verify_grad(grad_on_nll, [numpy.random.rand(n_samples, n_classes),
numpy.random.rand(n_classes)])
def grad_on_nll_dtype(dtype):
def grad_on_nll(x, b):
y_idx = numpy.random.randint(low=0, high=n_classes, size=n_samples).astype(dtype)
return self.op(x, b, y_idx=y_idx)[0]
return grad_on_nll
for dtype in ['uint8', 'int8', 'uint64', 'int64']:
utt.verify_grad(grad_on_nll_dtype(dtype),
[numpy.random.rand(n_samples, n_classes),
numpy.random.rand(n_classes)])
# Then test gradient when getting a gradient on the softmax output.
def grad_on_softmax(x, b):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论