提交 3a2e9988 authored 作者: lamblin's avatar lamblin

Merge pull request #1116 from nouiz/paper

Tech report and uint*
...@@ -6,7 +6,7 @@ Theano is a Python library that allows you to define, optimize, and ...@@ -6,7 +6,7 @@ Theano is a Python library that allows you to define, optimize, and
evaluate mathematical expressions involving multi-dimensional evaluate mathematical expressions involving multi-dimensional
arrays efficiently. Theano features: arrays efficiently. Theano features:
* **tight integration with numpy** -- Use `numpy.ndarray` in Theano-compiled functions. * **tight integration with NumPy** -- Use `numpy.ndarray` in Theano-compiled functions.
* **transparent use of a GPU** -- Perform data-intensive calculations up to 140x faster than with CPU.(float32 only) * **transparent use of a GPU** -- Perform data-intensive calculations up to 140x faster than with CPU.(float32 only)
* **efficient symbolic differentiation** -- Theano does your derivatives for function with one or many inputs. * **efficient symbolic differentiation** -- Theano does your derivatives for function with one or many inputs.
* **speed and stability optimizations** -- Get the right answer for ``log(1+x)`` even when ``x`` is really tiny. * **speed and stability optimizations** -- Get the right answer for ``log(1+x)`` even when ``x`` is really tiny.
...@@ -17,12 +17,19 @@ Theano has been powering large-scale computationally intensive scientific invest ...@@ -17,12 +17,19 @@ Theano has been powering large-scale computationally intensive scientific invest
since 2007. But it is also approachable enough to be used in the classroom since 2007. But it is also approachable enough to be used in the classroom
(IFT6266 at the University of Montreal). (IFT6266 at the University of Montreal).
News
====
* New technical report on Theano: `Theano: new features and speed improvements <http://arxiv.org/abs/1211.5590>`_. Please cite the other paper below.
* Theano 0.6rc2 was released. Everybody is encouraged to update.
* `HPCS 2011 Tutorial <http://www.iro.umontreal.ca/~lisa/pointeurs/tutorial_hpcs2011_fixed.pdf>`_. I included a few fix discovered while doing the Tutorial.
.. image:: images/talk2010.png .. image:: images/talk2010.png
:scale: 75% :scale: 75%
:align: left :align: left
**NEW!** `HPCS 2011 Tutorial <http://www.iro.umontreal.ca/~lisa/pointeurs/tutorial_hpcs2011_fixed.pdf>`_. I included a few fix discovered while doing the Tutorial.
You can watch a quick (20 minute) introduction to Theano given as a talk at `SciPy 2010 <http://conference.scipy.org/scipy2010/>`_ via streaming (or downloaded) video: You can watch a quick (20 minute) introduction to Theano given as a talk at `SciPy 2010 <http://conference.scipy.org/scipy2010/>`_ via streaming (or downloaded) video:
`Transparent GPU Computing With Theano`_. `Transparent GPU Computing With Theano`_.
......
...@@ -166,8 +166,11 @@ def function(inputs, outputs=None, mode=None, updates=None, givens=None, ...@@ -166,8 +166,11 @@ def function(inputs, outputs=None, mode=None, updates=None, givens=None,
if isinstance(updates, dict) and \ if isinstance(updates, dict) and \
not isinstance(updates, gof.python25.OrderedDict): not isinstance(updates, gof.python25.OrderedDict):
warnings.warn("Expected OrderedDict, got "+str(type(updates))+ "Using " warnings.warn(
"a standard dictionary here results in " "The parameter 'updates' of theano.function()"
" expect an OrderedDict,"
" got " + str(type(updates)) + "Using "
"a standard dictionary here results in "
"non-deterministic behavior. You should use an OrderedDict" "non-deterministic behavior. You should use an OrderedDict"
" if you are using python2.7 or use a list of (shared, update)" " if you are using python2.7 or use a list of (shared, update)"
" pairs. Do not just convert your dictionary to this type before" " pairs. Do not just convert your dictionary to this type before"
...@@ -176,7 +179,8 @@ def function(inputs, outputs=None, mode=None, updates=None, givens=None, ...@@ -176,7 +179,8 @@ def function(inputs, outputs=None, mode=None, updates=None, givens=None,
if givens is None: if givens is None:
givens = [] givens = []
if not isinstance(inputs, (list, tuple)): if not isinstance(inputs, (list, tuple)):
raise Exception("Inputs variable of a Theano function should be contained in a list, even when there is a single input.") raise Exception("Inputs variable of a Theano function should be"
" contained in a list, even when there is a single input.")
# compute some features of the arguments: # compute some features of the arguments:
uses_In = any([isinstance(i, In) for i in inputs]) # N.B. the square brackets are ncessary uses_In = any([isinstance(i, In) for i in inputs]) # N.B. the square brackets are ncessary
......
...@@ -54,10 +54,10 @@ class SoftmaxWithBias(gof.Op): ...@@ -54,10 +54,10 @@ class SoftmaxWithBias(gof.Op):
x = tensor.as_tensor_variable(x) x = tensor.as_tensor_variable(x)
b = tensor.as_tensor_variable(b) b = tensor.as_tensor_variable(b)
if x.type.ndim != 2 \ if x.type.ndim != 2 \
or x.type.dtype not in ['float32', 'float64']: or x.type.dtype not in tensor.float_dtypes:
raise ValueError('x must be 2-d tensor of floats') raise ValueError('x must be 2-d tensor of floats')
if b.type.ndim != 1 \ if b.type.ndim != 1 \
or x.type.dtype not in ['float32', 'float64']: or x.type.dtype not in tensor.float_dtypes:
raise ValueError('b must be 1-d tensor of floats') raise ValueError('b must be 1-d tensor of floats')
sm = x.type.make_variable() sm = x.type.make_variable()
...@@ -351,7 +351,7 @@ class Softmax(gof.Op): ...@@ -351,7 +351,7 @@ class Softmax(gof.Op):
def make_node(self, x): def make_node(self, x):
x = tensor.as_tensor_variable(x) x = tensor.as_tensor_variable(x)
if x.type.ndim not in (1, 2) \ if x.type.ndim not in (1, 2) \
or x.type.dtype not in ['float32', 'float64']: or x.type.dtype not in tensor.float_dtypes:
raise ValueError('x must be 1-d or 2-d tensor of floats') raise ValueError('x must be 1-d or 2-d tensor of floats')
if x.ndim == 1: if x.ndim == 1:
x = tensor.shape_padleft(x, n_ones=1) x = tensor.shape_padleft(x, n_ones=1)
...@@ -746,14 +746,14 @@ class CrossentropySoftmaxArgmax1HotWithBias(gof.Op): ...@@ -746,14 +746,14 @@ class CrossentropySoftmaxArgmax1HotWithBias(gof.Op):
b = tensor.as_tensor_variable(b) b = tensor.as_tensor_variable(b)
y_idx = tensor.as_tensor_variable(y_idx) y_idx = tensor.as_tensor_variable(y_idx)
if x.type.ndim != 2 \ if x.type.ndim != 2 \
or x.type.dtype not in ['float32', 'float64']: or x.type.dtype not in tensor.float_dtypes:
raise ValueError('x must be 2-d tensor of floats', x.type) raise ValueError('x must be 2-d tensor of floats', x.type)
if b.type.ndim != 1 \ if b.type.ndim != 1 \
or x.type.dtype not in ['float32', 'float64']: or x.type.dtype not in tensor.float_dtypes:
raise ValueError('b must be 1-d tensor of floats', b.type) raise ValueError('b must be 1-d tensor of floats', b.type)
if y_idx.type.ndim != 1 \ if y_idx.type.ndim != 1 \
or y_idx.type.dtype not in ['int8', 'int16', 'int32', 'int64']: or y_idx.type.dtype not in tensor.discrete_dtypes:
raise ValueError('y_idx must be 1-d tensor of ints', y_idx.type) raise ValueError('y_idx must be 1-d tensor of [u]ints', y_idx.type)
# TODO: Is this correct? It used to be y, not y_idx # TODO: Is this correct? It used to be y, not y_idx
nll = tensor.TensorType(x.type.dtype, nll = tensor.TensorType(x.type.dtype,
...@@ -884,15 +884,6 @@ class CrossentropySoftmaxArgmax1HotWithBias(gof.Op): ...@@ -884,15 +884,6 @@ class CrossentropySoftmaxArgmax1HotWithBias(gof.Op):
PyErr_SetString(PyExc_ValueError, "y_idx not 1d tensor"); PyErr_SetString(PyExc_ValueError, "y_idx not 1d tensor");
%(fail)s; %(fail)s;
} }
if ((PyArray_DESCR(%(y_idx)s)->type_num != NPY_INT64)
&& (PyArray_DESCR(%(y_idx)s)->type_num != NPY_INT32)
&& (PyArray_DESCR(%(y_idx)s)->type_num != NPY_INT16)
&& (PyArray_DESCR(%(y_idx)s)->type_num != NPY_INT8))
{
PyErr_SetString(PyExc_TypeError,
"y_idx not int8, int16, int32, or int64");
%(fail)s;
}
if (PyArray_DIMS(%(x)s)[0] != PyArray_DIMS(%(y_idx)s)[0]) if (PyArray_DIMS(%(x)s)[0] != PyArray_DIMS(%(y_idx)s)[0])
{ {
PyErr_Format(PyExc_ValueError, PyErr_Format(PyExc_ValueError,
...@@ -982,6 +973,15 @@ class CrossentropySoftmax1HotWithBiasDx (gof.Op): ...@@ -982,6 +973,15 @@ class CrossentropySoftmax1HotWithBiasDx (gof.Op):
dy = tensor.as_tensor_variable(dy) dy = tensor.as_tensor_variable(dy)
sm = tensor.as_tensor_variable(sm) sm = tensor.as_tensor_variable(sm)
y_idx = tensor.as_tensor_variable(y_idx) y_idx = tensor.as_tensor_variable(y_idx)
if (dy.type.ndim != 1 or
dy.type.dtype not in tensor.float_dtypes):
raise ValueError('dy must be 1-d tensor of floats', dy.type)
if (sm.type.ndim != 2 or
sm.type.dtype not in tensor.float_dtypes):
raise ValueError('sm must be 2-d tensor of floats', sm.type)
if (y_idx.type.ndim != 1 or
y_idx.type.dtype not in tensor.discrete_dtypes):
raise ValueError('y_idx must be 1-d tensor of [u]ints', y_idx.type)
return Apply(self, [dy, sm, y_idx], [sm.type.make_variable()]) return Apply(self, [dy, sm, y_idx], [sm.type.make_variable()])
def perform(self, node, input_storage, output_storage): def perform(self, node, input_storage, output_storage):
...@@ -1012,7 +1012,7 @@ class CrossentropySoftmax1HotWithBiasDx (gof.Op): ...@@ -1012,7 +1012,7 @@ class CrossentropySoftmax1HotWithBiasDx (gof.Op):
return [g_dy, g_sm, g_y_idx] return [g_dy, g_sm, g_y_idx]
def c_code_cache_version(self): def c_code_cache_version(self):
return (2,) return (3,)
def c_code(self, node, name, inp, out, sub): def c_code(self, node, name, inp, out, sub):
dnll, sm, y_idx = inp dnll, sm, y_idx = inp
...@@ -1034,15 +1034,6 @@ class CrossentropySoftmax1HotWithBiasDx (gof.Op): ...@@ -1034,15 +1034,6 @@ class CrossentropySoftmax1HotWithBiasDx (gof.Op):
"sm type should be float32 or float64"); "sm type should be float32 or float64");
%(fail)s; %(fail)s;
} }
if ((PyArray_DESCR(%(y_idx)s)->type_num != NPY_INT64)
&& (PyArray_DESCR(%(y_idx)s)->type_num != NPY_INT32)
&& (PyArray_DESCR(%(y_idx)s)->type_num != NPY_INT16)
&& (PyArray_DESCR(%(y_idx)s)->type_num != NPY_INT8))
{
PyErr_SetString(PyExc_TypeError,
"y_idx not int8, int16, int32, or int64");
%(fail)s;
}
if ((PyArray_NDIM(%(dnll)s) != 1) if ((PyArray_NDIM(%(dnll)s) != 1)
|| (PyArray_NDIM(%(sm)s) != 2) || (PyArray_NDIM(%(sm)s) != 2)
|| (PyArray_NDIM(%(y_idx)s) != 1)) || (PyArray_NDIM(%(y_idx)s) != 1))
......
...@@ -194,16 +194,20 @@ class T_CrossentropySoftmax1Hot(unittest.TestCase): ...@@ -194,16 +194,20 @@ class T_CrossentropySoftmax1Hot(unittest.TestCase):
class T_CrossentropySoftmax1HotWithBiasDx(utt.InferShapeTester): class T_CrossentropySoftmax1HotWithBiasDx(utt.InferShapeTester):
def test0(self): def test0(self):
def f(sm): def ff(class_dtype):
return (theano.tensor.nnet.crossentropy_softmax_1hot_with_bias_dx( def f(sm):
numpy.random.rand(10), # Gradient w.r.t. NLL. # Class indices
sm, # Softmax output. y = numpy.random.randint(low=0, high=5, size=10).astype(class_dtype)
numpy.random.randint(low=0, return theano.tensor.nnet.crossentropy_softmax_1hot_with_bias_dx(
high=5, size=10))) # Class indices. numpy.random.rand(10), # Gradient w.r.t. NLL.
sm, # Softmax output.
y)
return f
# Build a random softmax output whose rows sum to 1. # Build a random softmax output whose rows sum to 1.
softmax_output = numpy.random.rand(10, 5) softmax_output = numpy.random.rand(10, 5)
softmax_output /= softmax_output.sum(axis=1).reshape(10, 1) softmax_output /= softmax_output.sum(axis=1).reshape(10, 1)
utt.verify_grad(f, [softmax_output]) for dtype in ['uint8', 'int8', 'uint64', 'int64']:
utt.verify_grad(ff(dtype), [softmax_output])
def test1(self): def test1(self):
rng = numpy.random.RandomState(utt.fetch_seed()) rng = numpy.random.RandomState(utt.fetch_seed())
...@@ -243,11 +247,15 @@ class T_CrossentropySoftmaxArgmax1HotWithBias(utt.InferShapeTester): ...@@ -243,11 +247,15 @@ class T_CrossentropySoftmaxArgmax1HotWithBias(utt.InferShapeTester):
n_samples = 3 n_samples = 3
# First test gradient when getting a gradient on the NLL output. # First test gradient when getting a gradient on the NLL output.
def grad_on_nll(x, b): def grad_on_nll_dtype(dtype):
return self.op(x, b, y_idx=numpy.random.randint( def grad_on_nll(x, b):
low=0, high=n_classes, size=n_samples))[0] y_idx = numpy.random.randint(low=0, high=n_classes, size=n_samples).astype(dtype)
utt.verify_grad(grad_on_nll, [numpy.random.rand(n_samples, n_classes), return self.op(x, b, y_idx=y_idx)[0]
numpy.random.rand(n_classes)]) return grad_on_nll
for dtype in ['uint8', 'int8', 'uint64', 'int64']:
utt.verify_grad(grad_on_nll_dtype(dtype),
[numpy.random.rand(n_samples, n_classes),
numpy.random.rand(n_classes)])
# Then test gradient when getting a gradient on the softmax output. # Then test gradient when getting a gradient on the softmax output.
def grad_on_softmax(x, b): def grad_on_softmax(x, b):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论