提交 2345e188 authored 作者: Pascal Lamblin's avatar Pascal Lamblin

Merge pull request #1536 from nouiz/doc

Mixed small stuff
......@@ -35,7 +35,7 @@ script:
- df -h
- ulimit -a
- echo $PART
- theano-nose $PART
- theano-nose -v $PART
#after_script:
......
......@@ -197,18 +197,21 @@ List of Implemented Operations
- :class:`Dot <theano.sparse.basic.Dot>` and
:func:`dot <theano.sparse.basic.dot>`.
- One of the inputs must be sparse, the other sparse or dense.
- The grad implemented is regular.
- No C code for perform and no C code for grad.
- Return a dense for perform and a dense for grad.
- :class:`StructuredDot <theano.sparse.basic.StructuredDot>`
and :func:`structured_dot <theano.sparse.basic.structured_dot>`.
- The first input is sparse, the second can be sparse or dense.
- The grad implemented is structured.
- C code for perform and grad.
- Return a dense for perforn and a sparse for grad.
- :class:`TrueDot <theano.sparse.basic.TrueDot>` and
:func:`true_dot <theano.sparse.basic.true_dot>`.
- The first input is sparse, the second can be sparse or dense.
- The grad implemented is regular.
- No C code for perform and no C code for grad.
- Return a Sparse for perform and a Sparse for grad.
......@@ -217,17 +220,22 @@ List of Implemented Operations
- :class:`SamplingDot <theano.sparse.basic.SamplingDot>` and
``sampling_dot``.
- Both input must be dense.
- The grad implemented is structured for `p`.
- Sample of the dot and sample of the gradient.
- C code for perform but not for grad.
- Return sparse for perform and grad.
- :class:`Usmm <theano.sparse.basic.Usmm>` and ``usmm``.
- You *shouldn't* insert this op yourself!
- There is optimization that transform a
:class:`Dot <theano.sparse.basic.Dot>` to ``Usmm`` when possible.
- This op is the equivalent of gemm for sparse dot.
- There is no grad implemented for this op.
- There is optimization that transform a
:class:`Dot <theano.sparse.basic.Dot>` to ``Usmm`` when possible.
You shouldn't need to insert it yourself.
- There is no grad implemented for this op and this is not needed as
you don't insert it yourself.
- One of the inputs must be sparse, the other sparse or dense.
- Return a dense for perform
- Slice Operations
- sparse_variable[N, N], return a tensor scalar.
......
......@@ -116,6 +116,20 @@
The softmax function will, when applied to a matrix, compute the softmax values row-wise.
:note: this insert a particular op. But this op don't yet
implement the Rop for hessian free. If you want that, implement
this equivalent code that have the Rop implemented
``exp(x)/exp(x).sum(1, keep_dims=True)``. Theano should
optimize this by inserting the softmax op itself. The code of
the softmax op is more numeriacaly stable by using this code:
.. code-block:: python
e_x = exp(x - x.max(axis=1, keep_dims=True))
out = e_x / e_x.sum(axis=1, keep_dims=True)
Example of use:
.. code-block:: python
x,y,b = T.dvectors('x','y','b')
......
......@@ -64,6 +64,14 @@ void * device_malloc(size_t size)
void * device_malloc(size_t size, int verbose)
{
#if PRECHECK_ERROR
cudaThreadSynchronize();
cudaError_t prevError = cudaGetLastError();
if (cudaSuccess != prevError)
{
fprintf(stderr, "Error existed before calling device_malloc.\n");
}
#endif
void * rval=NULL;
cudaError_t err = cudaMalloc(&rval, size);
if (cudaSuccess != err)
......@@ -81,7 +89,7 @@ void * device_malloc(size_t size, int verbose)
cudaGetLastError();
fprintf(stderr,
"Error when tring to find the memory information"
" on the GPU\n");
" on the GPU: %s\n", cudaGetErrorString(err2));
}
#if COMPUTE_GPU_MEM_USED
fprintf(stderr,
......@@ -98,7 +106,8 @@ void * device_malloc(size_t size, int verbose)
#endif
}
PyErr_Format(PyExc_MemoryError,
"Error allocating %li bytes of device memory (%s).", (long)size, cudaGetErrorString(err));
"Error allocating %li bytes of device memory (%s).",
(long)size, cudaGetErrorString(err));
return NULL;
}
if (rval != NULL){
......@@ -109,14 +118,19 @@ void * device_malloc(size_t size, int verbose)
#if COMPUTE_GPU_MEM_USED
_allocated_size += size;
_max_allocated_size = std::max(_max_allocated_size, _allocated_size);
for(int i=0;i<TABLE_SIZE;i++){
int i = 0;
for(;i<TABLE_SIZE;i++){
if(NULL==_alloc_size_table[i].ptr){
_alloc_size_table[i].ptr=rval;
_alloc_size_table[i].size=size;
break;
}
}
if (i == TABLE_SIZE){
fprintf(stderr,
"When tracking GPU malloc, our table size wasn't big enough."
" So we loose some tracking. Raise the value of TABLE_SIZE in the file cuda_ndarra.cu");
}
#endif
}
//fprintf(stderr,
......@@ -129,23 +143,48 @@ void * device_malloc(size_t size, int verbose)
//printf("MEMSET\n");
}
#if PRINT_FREE_MALLOC
fprintf(stderr, "device malloc %p\n",rval);
fprintf(stderr, "device malloc %p of size %d\n", rval, size);
#endif
return rval;
}
int device_free(void *ptr)
{
#if PRINT_FREE_MALLOC
fprintf(stderr, "device_free %p\n",ptr);
#endif
#if PRECHECK_ERROR
cudaThreadSynchronize();
cudaError_t prevError = cudaGetLastError();
if (cudaSuccess != prevError)
{
fprintf(stderr, "Error existed before calling device_free.\n");
}
#endif
#if PRINT_FREE_MALLOC
size_t free = 0, total = 0;
cudaError_t err2 = cudaMemGetInfo(&free, &total);
if (err2 != cudaSuccess){
cudaGetLastError();
fprintf(stderr,
"Error when tring to find the memory information"
" on the GPU: %s\n", cudaGetErrorString(err2));
}
#if COMPUTE_GPU_MEM_USED
{
int i = 0;
for(;i<TABLE_SIZE;i++)
if(_alloc_size_table[i].ptr==ptr){
break;
}
assert(i<TABLE_SIZE);
fprintf(stderr, "device_free %p of size %d."
" Driver report %d bytes free and %d bytes total \n",
ptr, _alloc_size_table[i].size, free, total);
}
#else
fprintf(stderr, "device_free %p."
" Driver report %d bytes free and %d bytes total \n",
ptr, free, total);
#endif
#endif
// if there is no gpu context, the call to cudaFree will fail; skip it entirely
if(!g_gpu_context_active) {
......@@ -164,15 +203,34 @@ int device_free(void *ptr)
// it returns something else I still don't see why we should ignore
// it. All we want to do here is reset the flag.
cudaGetLastError();
size_t free = 0, total = 0;
cudaError_t err2 = cudaMemGetInfo(&free, &total);
if (err2 != cudaSuccess){
cudaGetLastError();
fprintf(stderr,
"Error when tring to find the memory information"
" on the GPU: %s\n", cudaGetErrorString(err2));
}
#if COMPUTE_GPU_MEM_USED
{
int i = 0;
for(;i<TABLE_SIZE;i++)
if(_alloc_size_table[i].ptr==ptr){
break;
}
assert(i<TABLE_SIZE);
fprintf(stderr,
"Error freeing device pointer %p (%s).%d byte already allocated\n",
ptr, cudaGetErrorString(err), _allocated_size);
"Error freeing device pointer %p (%s) of size %d. %d byte already allocated."
" Driver report %d bytes free and %d bytes total \n",
ptr, cudaGetErrorString(err),
_alloc_size_table[i].size, _allocated_size, free, total);
}
#else
fprintf(stderr,
"Error freeing device pointer %p (%s).\n",
"Error freeing device pointer %p (%s)."
" Driver report %d bytes free and %d bytes total \n",
ptr,
cudaGetErrorString(err));
cudaGetErrorString(err), free, total);
#endif
PyErr_Format(PyExc_MemoryError,
"error freeing device pointer %p (%s)",
......
......@@ -963,6 +963,7 @@ class Eigh(Eig):
_numop = staticmethod(numpy.linalg.eigh)
def __init__(self, UPLO='L'):
assert UPLO in ['L', 'U']
self.UPLO = UPLO
def __str__(self):
......@@ -1031,6 +1032,7 @@ class EighGrad(Op):
"""
def __init__(self, UPLO='L'):
assert UPLO in ['L', 'U']
self.UPLO = UPLO
if UPLO == 'L':
self.tri0 = numpy.tril
......
......@@ -360,11 +360,8 @@ class Softmax(gof.Op):
def perform(self, node, input_storage, output_storage):
x, = input_storage
sm = numpy.zeros_like(x)
for i in xrange(sm.shape[0]):
row = x[i]
sm[i] = numpy.exp(row - numpy.max(row))
sm[i] /= numpy.sum(sm[i])
e_x = numpy.exp(x - x.max(axis=1)[:, None])
sm = e_x / e_x.sum(axis=1)[:, None]
output_storage[0][0] = sm
def grad(self, inp, grads):
......
......@@ -8,9 +8,8 @@ from theano import config
from theano import tensor as T
from theano import tensor
from theano import gof
from theano.gof.python25 import all
from theano.tests import unittest_tools as utt
from theano import printing, pprint
from theano import printing
from theano.tensor.nnet import (categorical_crossentropy,
crossentropy_categorical_1hot,
crossentropy_softmax_1hot,
......@@ -1270,6 +1269,20 @@ class Test_softmax_opt:
assert softmax in f_ops
f(self.rng.rand(3, 4).astype(config.floatX))
def test_basic_keepdims(self):
c = T.matrix()
p_y = T.exp(c) / T.exp(c).sum(axis=1, keepdims=True)
# test that function contains softmax and no div.
f = theano.function([c], p_y, mode=self.mode)
f_ops = [n.op for n in f.maker.fgraph.toposort()]
#print '--- f ='
#printing.debugprint(f)
#print '==='
assert len(f_ops) == 1
assert softmax in f_ops
f(self.rng.rand(3, 4).astype(config.floatX))
def test_grad(self):
c = T.matrix()
p_y = T.exp(c) / T.exp(c).sum(axis=1).dimshuffle(0, 'x')
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论