提交 be0902fb authored 作者: Razvan Pascanu's avatar Razvan Pascanu

merge

To install the package, use:
To install the package, see this page:
python setup.py build
python setup.py test
python setup.py install
http://deeplearning.net/software/theano/install.html#install
For the documentation, see the project website:
http://pylearn.org/theano/
http://deeplearning.net/software/theano/
We recommend you look at the documentation on the website, since it
will be more current than the documentation included with the package.
......
......@@ -333,6 +333,13 @@ but this has not been tested yet.
cp libblas.dll /mingw/lib
mv libblas.dll /mingw/bin
- Edit (or create) your ``$HOME/.theanorc`` and add the following section:
.. code-block:: bash
[blas]
ldflags = -lblas
- Install `Mercurial <http://mercurial.selenic.com/downloads/>`__
(you can use the regular Windows release, you do not need TortoiseHg).
......
......@@ -936,7 +936,7 @@ class _Linker(gof.link.LocalLinker):
except (NotImplementedError, utils.MethodNotDefined):
thunks_c.append(None)
if self.maker.mode.check_py_code:
if self.maker.mode.check_py_code or thunks_c[-1] is None:
p = node.op.perform
thunk = (lambda p = p, i = node_input_storage, o = node_output_storage, n =
node: p(n, [x[0] for x in i], o))
......@@ -1455,7 +1455,7 @@ class DebugMode(Mode):
check_py_code = config.DebugMode.check_py
"""
Should we evaluate (and check) the `perform` implementations?
Should we evaluate (and check) the `perform` implementations? Always checked if no `c_code`.
"""
check_isfinite = config.DebugMode.check_finite
......
......@@ -33,7 +33,7 @@ run_cthunk(PyObject *self, PyObject *args)
return NULL;
}
void * ptr_addr = PyCObject_AsVoidPtr(py_cthunk);
int (*fn)(void*) = reinterpret_cast<int (*)(void*)>(ptr_addr);
int (*fn)(void*) = (int (*)(void*))(ptr_addr);
void* it = PyCObject_GetDesc(py_cthunk);
int failure = fn(it);
......
......@@ -12,10 +12,10 @@ import theano.tensor.signal.downsample as downsample
import numpy
raise SkipTest('SKIP TO MAKE THE BUILDBOT DON\'T CRASH. THEIR IS A DIFFICULT BUG TO FIX WITH MEMORY LEAK AND/OR WHEN Cuda_Ndarray alloc fail!')
# Skip test if cuda_ndarray is not available.
from nose.plugins.skip import SkipTest
raise SkipTest('SKIP TO PREVENT THE BUILDBOT FROM CRASHING. THERE IS A DIFFICULT BUG TO FIX WITH MEMORY LEAK AND/OR WHEN Cuda_Ndarray alloc fail!')
import theano.sandbox.cuda as cuda_ndarray
if cuda_ndarray.cuda_available == False:
raise SkipTest('Optional package cuda disabled')
......
......@@ -10,6 +10,7 @@ import numpy
from theano import Op, Apply, shared, config
from theano.tensor import raw_random, TensorType, as_tensor_variable, get_vector_length, cast, opt
from theano.tensor import zeros_like, sqrt, log, sin, cos, join
from theano.compile import optdb
from theano.gof import local_optimizer
......@@ -650,6 +651,49 @@ class MRG_RandomStreams(object):
else:
raise NotImplementedError("MRG_RandomStreams.binomial with n > 1")
def normal(self, size=None, avg=0.0, std=1.0, ndim=None, dtype=config.floatX):
# We need an even number of ]0,1[ samples. Then we split them
# in two halves. First half becomes our U1's for Box-Muller,
# second half our U2's. See Wikipedia page:
# http://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform
n_samples = self.n_streams(size)
evened = False
if n_samples % 2 == 1:
n_samples += 1
evened = True
flattened = self.uniform(size=(n_samples,), dtype=dtype)
U1 = flattened[:n_samples/2]
U2 = flattened[n_samples/2:]
#normal_samples = zeros_like(flattened)
sqrt_ln_U1 = sqrt(-2.0*log(U1))
# TypeError: 'TensorVariable' object does not support item assignment
# so this doesn't work...
#normal_samples[:n_samples/2] = sqrt_ln_U1 * cos(2.0*numpy.pi*U2)
#normal_samples[n_samples/2:] = sqrt_ln_U1 * sin(2.0*numpy.pi*U2)
# so trying this instead
first_half = sqrt_ln_U1 * cos(2.0*numpy.pi*U2)
second_half = sqrt_ln_U1 * sin(2.0*numpy.pi*U2)
normal_samples = join(0, first_half, second_half)
final_samples = None
if evened:
final_samples = normal_samples[:-1]
else:
final_samples = normal_samples
final_samples = avg + std * final_samples
if size:
final_samples = final_samples.reshape(size)
return final_samples
@local_optimizer([None])
def mrg_random_make_inplace(node):
op = node.op
......@@ -734,3 +778,78 @@ def test_rng0():
basictest(ff, 1000, prefix='numpy')
def test_normal0():
def basictest(f, steps, target_avg, target_std, prefix=""):
dt = 0.0
avg_std = 0.0
for i in xrange(steps):
t0 = time.time()
ival = f()
dt += time.time() - t0
ival = numpy.asarray(ival)
if i == 0:
mean = numpy.array(ival, copy=True)
avg_std = numpy.std(ival)
else:
alpha = 1.0 / (1+i)
mean = alpha * ival + (1-alpha)*mean
avg_std = alpha * numpy.std(ival) + (1-alpha)*avg_std
print prefix, 'mean', numpy.mean(mean)
assert abs(numpy.mean(mean) - target_avg) < .01, 'bad mean?'
print prefix, 'std', avg_std
assert abs(avg_std - target_std) < .01, 'bad std?'
print prefix, 'time', dt
print prefix, 'elements', steps*sample_size[0]*sample_size[1]
print prefix, 'samples/sec', steps*sample_size[0]*sample_size[1] / dt
sample_size = (999,100)
print ''
print 'ON CPU:'
R = MRG_RandomStreams(234, use_cuda=False)
n = R.normal(size=sample_size, avg=-5.0, std=2.0)
f = theano.function([], n)
theano.printing.debugprint(f)
print 'random?[:10]\n', f()[0,0:10]
basictest(f, 50, -5.0, 2.0, prefix='mrg ')
sys.stdout.flush()
# now with odd number of samples
sample_size = (999,99)
print ''
print 'ON GPU:'
R = MRG_RandomStreams(234, use_cuda=True)
n = R.normal(size=sample_size, avg=-5.0, std=2.0, dtype='float32')
assert n.dtype == 'float32' #well, it's really that this test w GPU doesn't make sense otw
f = theano.function([], theano.Out(
theano.sandbox.cuda.basic_ops.gpu_from_host(n),
borrow=True))
theano.printing.debugprint(f)
print 'random?[:10]\n', numpy.asarray(f())[0,0:10]
basictest(f, 50, -5.0, 2.0, prefix='gpu mrg ')
sys.stdout.flush()
print ''
print 'ON CPU w NUMPY:'
RR = theano.tensor.shared_randomstreams.RandomStreams(234)
nn = RR.normal(size=sample_size, avg=-5.0, std=2.0)
ff = theano.function([], nn)
basictest(ff, 50, -5.0, 2.0, prefix='numpy ')
#if __name__ == '__main__':
# # with: export THEANO_FLAGS=device=gpu0,floatX=float32
# test_normal0()
......@@ -1414,12 +1414,16 @@ class Composite(ScalarOp):
name = "V%%(id)s_tmp%i" % i
subd[output] = name
_c_code += "%s %s;\n" % (output.type.dtype_specs()[1], name)
_c_code += node.op.c_code(node,
s = node.op.c_code(node,
"%(name)s",
[subd[input] for input in node.inputs],
[subd[output] for output in node.outputs],
dict(fail = "%(fail)s",
id = "%%(id)s_%i" % j))
if any([isinstance(x.op,Mod) for x in env.toposort()]):
s = s.replace('% ','%% ')
_c_code += s
_c_code += "\n"
_c_code += "}\n"
......@@ -1481,6 +1485,9 @@ class Composite(ScalarOp):
return self._c_code % d
def c_code_cache_version(self):
return (1,)+tuple([x.op.c_code_cache_version() for x in self.env.toposort()])
def __eq__(self, other):
if self is other: return True
if not isinstance(other, self.__class__): return False
......
......@@ -100,18 +100,24 @@ class GemmRelated(Op):
#ifndef MOD
#define MOD %
#endif
static double time_time() // a time function like time.time()
{
struct timeval tv;
gettimeofday(&tv, 0);
return (double) tv.tv_sec + (double) tv.tv_usec / 1000000.0;
}
"""
return blas_header_text() + mod_str
def c_headers(self):
# std.cout doesn't require the '%' symbol to print stuff...
# so it works much better with python's string-substitution stuff.
return ['<iostream>']
return ['<iostream>', '<time.h>', '<sys/time.h>']
def c_libraries(self):
return ldflags()
def c_code_cache_version(self):
return (0,0,1)
# code_cache_version is built by subclasses from
# build_gemm_version
def c_compile_args(self):
return ldflags(libs=False, flags=True)
......@@ -247,6 +253,7 @@ class GemmRelated(Op):
char T = 'T';
int Nz0 = Nz[0], Nz1 = Nz[1], Nx1 = Nx[1];
//std::cerr << (unit/256) MOD 16 << (unit / 16) MOD 16 << unit MOD 16<< '\\n';
//double t0 = time_time();
switch(unit)
{
case 0x000: sgemm_(&N, &N, &Nz1, &Nz0, &Nx1, &a, y, &sy_0, x, &sx_0, &b, z, &sz_0); break;
......@@ -259,6 +266,7 @@ class GemmRelated(Op):
case 0x111: sgemm_(&N, &N, &Nz0, &Nz1, &Nx1, &a, x, &sx_1, y, &sy_1, &b, z, &sz_1); break;
default: PyErr_SetString(PyExc_ValueError, "some matrix has no unit stride"); %(fail)s;
};
//fprintf(stderr, "Calling sgemm %%i %%i %%i %%i took %%f\\n", unit, Nz1, Nz0, Nx1, time_time() - t0);
"""
case_double = """
......@@ -278,6 +286,7 @@ class GemmRelated(Op):
char T = 'T';
int Nz0 = Nz[0], Nz1 = Nz[1], Nx1 = Nx[1];
//std::cerr << (unit/256) MOD 16 << (unit / 16) MOD 16 << unit MOD 16<< '\\n';
//double t0 = time_time();
switch(unit)
{
case 0x000: dgemm_(&N, &N, &Nz1, &Nz0, &Nx1, &a, y, &sy_0, x, &sx_0, &b, z, &sz_0); break;
......@@ -290,6 +299,7 @@ class GemmRelated(Op):
case 0x111: dgemm_(&N, &N, &Nz0, &Nz1, &Nx1, &a, x, &sx_1, y, &sy_1, &b, z, &sz_1); break;
default: PyErr_SetString(PyExc_ValueError, "some matrix has no unit stride"); %(fail)s;
};
//fprintf(stderr, "Calling dgemm %%i %%i %%i %%i took %%f\\n", unit, Nz1, Nz0, Nx1, time_time()- t0);
"""
end_switch_typenum = """
......@@ -319,7 +329,7 @@ class GemmRelated(Op):
self.end_switch_typenum), '')
def build_gemm_version(self):
return (2,)
return (4,)
class Gemm(GemmRelated):
"""In-place version of matrix-matrix multiplication (with accumulation):
......@@ -442,6 +452,7 @@ class Gemm(GemmRelated):
dims[0] = %(_z)s->dimensions[0];
dims[1] = %(_z)s->dimensions[1];
%(_zout)s = (PyArrayObject*)PyArray_SimpleNew(2, dims, type_num_%(_z)s);
//fprintf(stderr, "Gemm Allocating %%i %%i\\n", dims[0], dims[1]);
if(!%(_zout)s) {
PyErr_SetString(PyExc_MemoryError, "failed to alloc gemm_no_inplace output");
%(fail)s
......@@ -515,7 +526,11 @@ class Gemm(GemmRelated):
return full_code
def c_code_cache_version(self):
return (3,) + self.build_gemm_version()
gv = self.build_gemm_version()
if gv:
return (3,) + gv
else:
return gv
gemm_inplace = Gemm(inplace=True)
gemm_no_inplace = Gemm(inplace=False)
......@@ -817,6 +832,7 @@ class Dot22(GemmRelated):
dims[0] = %(_x)s->dimensions[0];
dims[1] = %(_y)s->dimensions[1];
%(_zout)s = (PyArrayObject*)PyArray_SimpleNew(2, dims, type_num_%(_x)s);
//fprintf(stderr, "Dot Allocating %%i %%i\\n", dims[0], dims[1]);
if(!%(_zout)s) {
PyErr_SetString(PyExc_MemoryError, "failed to alloc dot22 output");
%(fail)s
......@@ -841,7 +857,11 @@ class Dot22(GemmRelated):
full_code = self.build_gemm_call() % dict(locals(), **sub)
return full_code
def c_code_cache_version(self):
return (1,) + self.build_gemm_version()
gv = self.build_gemm_version()
if gv:
return (1,) + gv
else:
return gv
_dot22 = Dot22()
......@@ -947,7 +967,11 @@ class Dot22Scalar(GemmRelated):
full_code = self.build_gemm_call() % dict(locals(), **sub)
return full_code
def c_code_cache_version(self):
return (2,) + self.build_gemm_version()
gv = self.build_gemm_version()
if gv:
return (2,) + gv
else:
return gv
_dot22scalar = Dot22Scalar()
......
"""
Contains an op for convolving input images with a set of filters. This was
Contains an Op for convolving input images with a set of filters. This was
developed especially for Convolutional Neural Networks.
For related ops, including downsampling and subsampling, see
tensor.signal and tensor.signal.downsample.
See especially conv2d().
"""
__docformat__ = "restructuredtext en"
......@@ -764,7 +769,6 @@ using namespace std;
d["self_dx"]=self.dx
d["self_dy"]=self.dy
d["mode"]=self.out_mode.upper()
d["mode"]=self.out_mode.upper()
d["affectation"]="="
if all_shape:
d["self_bsize"]=self.bsize
......@@ -910,7 +914,7 @@ if(%(filtersflipped)s->nd==3){
kerns_dim[1]=%(filtersflipped)s->dimensions[1];
kerns_dim[0]=%(filtersflipped)s->dimensions[0];
}else{
std:stringstream temp;
std::stringstream temp;
temp << "nddim="<<%(filtersflipped)s->nd;
std::string param = temp.str();
PyErr_SetString(PyExc_ValueError,
......@@ -1145,7 +1149,7 @@ if(%(filtersflipped)s->nd==3){
kerns_dim[1]=%(filtersflipped)s->dimensions[1];
kerns_dim[0]=%(filtersflipped)s->dimensions[0];
}else{
std:stringstream temp;
std::stringstream temp;
temp << "nddim="<<%(filtersflipped)s->nd;
std::string param = temp.str();
PyErr_SetString(PyExc_ValueError,
......@@ -1377,7 +1381,7 @@ if(%(img2d)s->nd==2){
img2d_dim[1]=%(img2d)s->dimensions[1];
img2d_dim[0]=%(img2d)s->dimensions[0];
}else {
std:stringstream temp;
std::stringstream temp;
temp << "nddim="<<%(img2d)s->nd;
std::string param = temp.str();
PyErr_SetString(PyExc_ValueError,
......
......@@ -143,16 +143,18 @@ def speed_multilayer_conv():
validate=False# we don't validate the result to have it much faster!
verbose=1
unroll_batch = [1,2,4,5,10,20]
unroll_kern = [1,2,4,5,10,20]
unroll_batch = [1,4,5]
unroll_kern = [1,4,5]
unroll_batch = [1,2,3,4,5,10]#15, 30, 60 always much slower
unroll_kern = [1,2,3,4,5,10]#15, 30, 60 always much slower
#unroll_batch = [1,4,5]
#unroll_kern = [1,4,5]
#unroll_batch = [1,4]
#unroll_kern = [1,4]
unroll_patch = [True, False]
bsize = 20 # batch size
bsize = 60 # batch size
imshp_start = (1,48,48)#un square shape to test more corner case.
kshps = ([11,12],[12,11])#un square shape to test more corner case.
nkerns = [20,20] # per output pixel
nkerns = [60,60] # per output pixel
ssizes = [(1,1),]#(1,1)]#(2,2) bugged
convmodes = ['valid','full']
do_convolve2=False
......@@ -212,8 +214,10 @@ def speed_multilayer_conv():
best=N.asarray(best)
worst=N.asarray(worst)
print "timing for unrolled version"
print t_b_k
print t
print "unroll_batch/unroll_kern valid_mode full_mode"
for n_b in range(len(unroll_batch)):
for n_k in range(len(unroll_kern)):
print unroll_batch[n_b],"/",unroll_kern[n_k], " ",t[n_b,n_k]
t_detail=t
t = t.sum(axis=2)
print "max %.3fs"%t.max(), "max param(batch unloop size/kernel unloop size)", t_b_k[t.argmax()]
......
......@@ -88,10 +88,10 @@ class TestConv2D(unittest.TestCase):
Tests that basic convolutions work for odd and even dimensions of image and filter
shapes, as well as rectangular images and filters.
"""
self.validate((3,2,8,8), (4,2,5,5), 'valid')
self.validate((3,2,8,8), (4,2,5,5), 'valid', verify_grad=False)
self.validate((3,2,7,5), (5,2,2,3), 'valid')
self.validate((3,2,7,5), (5,2,3,2), 'valid')
self.validate((3,2,8,8), (4,2,5,5), 'full')
self.validate((3,2,7,5), (5,2,3,2), 'valid', verify_grad=False)
self.validate((3,2,8,8), (4,2,5,5), 'full', verify_grad=False)
self.validate((3,2,7,5), (5,2,2,3), 'full')
# test filter same size as input
......@@ -105,7 +105,7 @@ class TestConv2D(unittest.TestCase):
"""
self.validate((3,2,7,5), (5,2,2,3), 'valid', unroll_patch=False)
self.validate((3,2,7,5), (5,2,2,3), 'full', unroll_patch=False)
self.validate((3,2,3,3), (4,2,3,3), 'valid', unroll_patch=False)
self.validate((3,2,3,3), (4,2,3,3), 'valid', unroll_patch=False, verify_grad=False)
def test_unroll_special(self):
"""
......@@ -175,7 +175,17 @@ class TestConv2D(unittest.TestCase):
"""
try:
self.validate((3,2,8,8), (4,2,5,5), 'valid', input = T.dmatrix())
# should never reach here
self.fail()
except:
pass
try:
self.validate((3,2,8,8), (4,2,5,5), 'valid', filters = T.dvector())
# should never reach here
self.fail()
except:
pass
try:
self.validate((3,2,8,8), (4,2,5,5), 'valid', input = T.dtensor3())
# should never reach here
self.fail()
......
......@@ -224,7 +224,12 @@ class MakeVector(T.Op):
def __str__(self):
return self.__class__.__name__
def perform(self, node, inputs, (out,)):
out[0] = theano._asarray(inputs, dtype=node.outputs[0].dtype)
# not calling theano._asarray as optimization
if out[0] is None:
out[0] = theano._asarray(inputs, dtype=node.outputs[0].dtype)
else:
# assume that out has correct dtype. there is no cheap way to check
out[0][...] = inputs
make_vector = MakeVector()
......@@ -262,7 +267,10 @@ class Shape_i(T.Op):
raise TypeError('x has too few dimensions for Shape_i', (x, self.i))
return T.Apply(self, [x], [T.lscalar()])
def perform(self, node, (x, ), (out, )):
out[0] = theano._asarray(x.shape[self.i], dtype = 'int64')
if out[0] is None:
out[0] = theano._asarray(x.shape[self.i], dtype='int64')
else:
out[0][...] = x.shape[self.i]
def grad(self, (x,), (gz,)):
return [None]
......
......@@ -603,8 +603,12 @@ def test_dot22scalar():
#currently the canonizer don't always merge all Mul together...
#that force the optimizer to make a recursive search witch it don't do now.
#but it do it for 1 level of recursion.
# assert _dot22scalar in [x.op for x in topo]
# assert len(topo)==2
# assert _dot22scalar in [x.op for x in topo]
# assert len(topo)==2
### Fred,
### What are you talking about?
### -James (March 28 2010)
f(av,bv,cv)
f = theano.function([a,b,c],c * a*0.2*T.dot(a,b),mode=m2)
topo = f.maker.env.toposort()
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论