提交 be0902fb authored 作者: Razvan Pascanu's avatar Razvan Pascanu

merge

To install the package, use: To install the package, see this page:
python setup.py build http://deeplearning.net/software/theano/install.html#install
python setup.py test
python setup.py install
For the documentation, see the project website: For the documentation, see the project website:
http://pylearn.org/theano/ http://deeplearning.net/software/theano/
We recommend you look at the documentation on the website, since it We recommend you look at the documentation on the website, since it
will be more current than the documentation included with the package. will be more current than the documentation included with the package.
......
...@@ -333,6 +333,13 @@ but this has not been tested yet. ...@@ -333,6 +333,13 @@ but this has not been tested yet.
cp libblas.dll /mingw/lib cp libblas.dll /mingw/lib
mv libblas.dll /mingw/bin mv libblas.dll /mingw/bin
- Edit (or create) your ``$HOME/.theanorc`` and add the following section:
.. code-block:: bash
[blas]
ldflags = -lblas
- Install `Mercurial <http://mercurial.selenic.com/downloads/>`__ - Install `Mercurial <http://mercurial.selenic.com/downloads/>`__
(you can use the regular Windows release, you do not need TortoiseHg). (you can use the regular Windows release, you do not need TortoiseHg).
......
...@@ -936,7 +936,7 @@ class _Linker(gof.link.LocalLinker): ...@@ -936,7 +936,7 @@ class _Linker(gof.link.LocalLinker):
except (NotImplementedError, utils.MethodNotDefined): except (NotImplementedError, utils.MethodNotDefined):
thunks_c.append(None) thunks_c.append(None)
if self.maker.mode.check_py_code: if self.maker.mode.check_py_code or thunks_c[-1] is None:
p = node.op.perform p = node.op.perform
thunk = (lambda p = p, i = node_input_storage, o = node_output_storage, n = thunk = (lambda p = p, i = node_input_storage, o = node_output_storage, n =
node: p(n, [x[0] for x in i], o)) node: p(n, [x[0] for x in i], o))
...@@ -1455,7 +1455,7 @@ class DebugMode(Mode): ...@@ -1455,7 +1455,7 @@ class DebugMode(Mode):
check_py_code = config.DebugMode.check_py check_py_code = config.DebugMode.check_py
""" """
Should we evaluate (and check) the `perform` implementations? Should we evaluate (and check) the `perform` implementations? Always checked if no `c_code`.
""" """
check_isfinite = config.DebugMode.check_finite check_isfinite = config.DebugMode.check_finite
......
...@@ -33,7 +33,7 @@ run_cthunk(PyObject *self, PyObject *args) ...@@ -33,7 +33,7 @@ run_cthunk(PyObject *self, PyObject *args)
return NULL; return NULL;
} }
void * ptr_addr = PyCObject_AsVoidPtr(py_cthunk); void * ptr_addr = PyCObject_AsVoidPtr(py_cthunk);
int (*fn)(void*) = reinterpret_cast<int (*)(void*)>(ptr_addr); int (*fn)(void*) = (int (*)(void*))(ptr_addr);
void* it = PyCObject_GetDesc(py_cthunk); void* it = PyCObject_GetDesc(py_cthunk);
int failure = fn(it); int failure = fn(it);
......
...@@ -12,10 +12,10 @@ import theano.tensor.signal.downsample as downsample ...@@ -12,10 +12,10 @@ import theano.tensor.signal.downsample as downsample
import numpy import numpy
raise SkipTest('SKIP TO MAKE THE BUILDBOT DON\'T CRASH. THEIR IS A DIFFICULT BUG TO FIX WITH MEMORY LEAK AND/OR WHEN Cuda_Ndarray alloc fail!')
# Skip test if cuda_ndarray is not available. # Skip test if cuda_ndarray is not available.
from nose.plugins.skip import SkipTest from nose.plugins.skip import SkipTest
raise SkipTest('SKIP TO PREVENT THE BUILDBOT FROM CRASHING. THERE IS A DIFFICULT BUG TO FIX WITH MEMORY LEAK AND/OR WHEN Cuda_Ndarray alloc fail!')
import theano.sandbox.cuda as cuda_ndarray import theano.sandbox.cuda as cuda_ndarray
if cuda_ndarray.cuda_available == False: if cuda_ndarray.cuda_available == False:
raise SkipTest('Optional package cuda disabled') raise SkipTest('Optional package cuda disabled')
......
...@@ -10,6 +10,7 @@ import numpy ...@@ -10,6 +10,7 @@ import numpy
from theano import Op, Apply, shared, config from theano import Op, Apply, shared, config
from theano.tensor import raw_random, TensorType, as_tensor_variable, get_vector_length, cast, opt from theano.tensor import raw_random, TensorType, as_tensor_variable, get_vector_length, cast, opt
from theano.tensor import zeros_like, sqrt, log, sin, cos, join
from theano.compile import optdb from theano.compile import optdb
from theano.gof import local_optimizer from theano.gof import local_optimizer
...@@ -650,6 +651,49 @@ class MRG_RandomStreams(object): ...@@ -650,6 +651,49 @@ class MRG_RandomStreams(object):
else: else:
raise NotImplementedError("MRG_RandomStreams.binomial with n > 1") raise NotImplementedError("MRG_RandomStreams.binomial with n > 1")
def normal(self, size=None, avg=0.0, std=1.0, ndim=None, dtype=config.floatX):
# We need an even number of ]0,1[ samples. Then we split them
# in two halves. First half becomes our U1's for Box-Muller,
# second half our U2's. See Wikipedia page:
# http://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform
n_samples = self.n_streams(size)
evened = False
if n_samples % 2 == 1:
n_samples += 1
evened = True
flattened = self.uniform(size=(n_samples,), dtype=dtype)
U1 = flattened[:n_samples/2]
U2 = flattened[n_samples/2:]
#normal_samples = zeros_like(flattened)
sqrt_ln_U1 = sqrt(-2.0*log(U1))
# TypeError: 'TensorVariable' object does not support item assignment
# so this doesn't work...
#normal_samples[:n_samples/2] = sqrt_ln_U1 * cos(2.0*numpy.pi*U2)
#normal_samples[n_samples/2:] = sqrt_ln_U1 * sin(2.0*numpy.pi*U2)
# so trying this instead
first_half = sqrt_ln_U1 * cos(2.0*numpy.pi*U2)
second_half = sqrt_ln_U1 * sin(2.0*numpy.pi*U2)
normal_samples = join(0, first_half, second_half)
final_samples = None
if evened:
final_samples = normal_samples[:-1]
else:
final_samples = normal_samples
final_samples = avg + std * final_samples
if size:
final_samples = final_samples.reshape(size)
return final_samples
@local_optimizer([None]) @local_optimizer([None])
def mrg_random_make_inplace(node): def mrg_random_make_inplace(node):
op = node.op op = node.op
...@@ -734,3 +778,78 @@ def test_rng0(): ...@@ -734,3 +778,78 @@ def test_rng0():
basictest(ff, 1000, prefix='numpy') basictest(ff, 1000, prefix='numpy')
def test_normal0():
def basictest(f, steps, target_avg, target_std, prefix=""):
dt = 0.0
avg_std = 0.0
for i in xrange(steps):
t0 = time.time()
ival = f()
dt += time.time() - t0
ival = numpy.asarray(ival)
if i == 0:
mean = numpy.array(ival, copy=True)
avg_std = numpy.std(ival)
else:
alpha = 1.0 / (1+i)
mean = alpha * ival + (1-alpha)*mean
avg_std = alpha * numpy.std(ival) + (1-alpha)*avg_std
print prefix, 'mean', numpy.mean(mean)
assert abs(numpy.mean(mean) - target_avg) < .01, 'bad mean?'
print prefix, 'std', avg_std
assert abs(avg_std - target_std) < .01, 'bad std?'
print prefix, 'time', dt
print prefix, 'elements', steps*sample_size[0]*sample_size[1]
print prefix, 'samples/sec', steps*sample_size[0]*sample_size[1] / dt
sample_size = (999,100)
print ''
print 'ON CPU:'
R = MRG_RandomStreams(234, use_cuda=False)
n = R.normal(size=sample_size, avg=-5.0, std=2.0)
f = theano.function([], n)
theano.printing.debugprint(f)
print 'random?[:10]\n', f()[0,0:10]
basictest(f, 50, -5.0, 2.0, prefix='mrg ')
sys.stdout.flush()
# now with odd number of samples
sample_size = (999,99)
print ''
print 'ON GPU:'
R = MRG_RandomStreams(234, use_cuda=True)
n = R.normal(size=sample_size, avg=-5.0, std=2.0, dtype='float32')
assert n.dtype == 'float32' #well, it's really that this test w GPU doesn't make sense otw
f = theano.function([], theano.Out(
theano.sandbox.cuda.basic_ops.gpu_from_host(n),
borrow=True))
theano.printing.debugprint(f)
print 'random?[:10]\n', numpy.asarray(f())[0,0:10]
basictest(f, 50, -5.0, 2.0, prefix='gpu mrg ')
sys.stdout.flush()
print ''
print 'ON CPU w NUMPY:'
RR = theano.tensor.shared_randomstreams.RandomStreams(234)
nn = RR.normal(size=sample_size, avg=-5.0, std=2.0)
ff = theano.function([], nn)
basictest(ff, 50, -5.0, 2.0, prefix='numpy ')
#if __name__ == '__main__':
# # with: export THEANO_FLAGS=device=gpu0,floatX=float32
# test_normal0()
...@@ -1414,12 +1414,16 @@ class Composite(ScalarOp): ...@@ -1414,12 +1414,16 @@ class Composite(ScalarOp):
name = "V%%(id)s_tmp%i" % i name = "V%%(id)s_tmp%i" % i
subd[output] = name subd[output] = name
_c_code += "%s %s;\n" % (output.type.dtype_specs()[1], name) _c_code += "%s %s;\n" % (output.type.dtype_specs()[1], name)
_c_code += node.op.c_code(node,
s = node.op.c_code(node,
"%(name)s", "%(name)s",
[subd[input] for input in node.inputs], [subd[input] for input in node.inputs],
[subd[output] for output in node.outputs], [subd[output] for output in node.outputs],
dict(fail = "%(fail)s", dict(fail = "%(fail)s",
id = "%%(id)s_%i" % j)) id = "%%(id)s_%i" % j))
if any([isinstance(x.op,Mod) for x in env.toposort()]):
s = s.replace('% ','%% ')
_c_code += s
_c_code += "\n" _c_code += "\n"
_c_code += "}\n" _c_code += "}\n"
...@@ -1481,6 +1485,9 @@ class Composite(ScalarOp): ...@@ -1481,6 +1485,9 @@ class Composite(ScalarOp):
return self._c_code % d return self._c_code % d
def c_code_cache_version(self):
return (1,)+tuple([x.op.c_code_cache_version() for x in self.env.toposort()])
def __eq__(self, other): def __eq__(self, other):
if self is other: return True if self is other: return True
if not isinstance(other, self.__class__): return False if not isinstance(other, self.__class__): return False
......
...@@ -100,18 +100,24 @@ class GemmRelated(Op): ...@@ -100,18 +100,24 @@ class GemmRelated(Op):
#ifndef MOD #ifndef MOD
#define MOD % #define MOD %
#endif #endif
static double time_time() // a time function like time.time()
{
struct timeval tv;
gettimeofday(&tv, 0);
return (double) tv.tv_sec + (double) tv.tv_usec / 1000000.0;
}
""" """
return blas_header_text() + mod_str return blas_header_text() + mod_str
def c_headers(self): def c_headers(self):
# std.cout doesn't require the '%' symbol to print stuff... # std.cout doesn't require the '%' symbol to print stuff...
# so it works much better with python's string-substitution stuff. # so it works much better with python's string-substitution stuff.
return ['<iostream>'] return ['<iostream>', '<time.h>', '<sys/time.h>']
def c_libraries(self): def c_libraries(self):
return ldflags() return ldflags()
def c_code_cache_version(self): # code_cache_version is built by subclasses from
return (0,0,1) # build_gemm_version
def c_compile_args(self): def c_compile_args(self):
return ldflags(libs=False, flags=True) return ldflags(libs=False, flags=True)
...@@ -247,6 +253,7 @@ class GemmRelated(Op): ...@@ -247,6 +253,7 @@ class GemmRelated(Op):
char T = 'T'; char T = 'T';
int Nz0 = Nz[0], Nz1 = Nz[1], Nx1 = Nx[1]; int Nz0 = Nz[0], Nz1 = Nz[1], Nx1 = Nx[1];
//std::cerr << (unit/256) MOD 16 << (unit / 16) MOD 16 << unit MOD 16<< '\\n'; //std::cerr << (unit/256) MOD 16 << (unit / 16) MOD 16 << unit MOD 16<< '\\n';
//double t0 = time_time();
switch(unit) switch(unit)
{ {
case 0x000: sgemm_(&N, &N, &Nz1, &Nz0, &Nx1, &a, y, &sy_0, x, &sx_0, &b, z, &sz_0); break; case 0x000: sgemm_(&N, &N, &Nz1, &Nz0, &Nx1, &a, y, &sy_0, x, &sx_0, &b, z, &sz_0); break;
...@@ -259,6 +266,7 @@ class GemmRelated(Op): ...@@ -259,6 +266,7 @@ class GemmRelated(Op):
case 0x111: sgemm_(&N, &N, &Nz0, &Nz1, &Nx1, &a, x, &sx_1, y, &sy_1, &b, z, &sz_1); break; case 0x111: sgemm_(&N, &N, &Nz0, &Nz1, &Nx1, &a, x, &sx_1, y, &sy_1, &b, z, &sz_1); break;
default: PyErr_SetString(PyExc_ValueError, "some matrix has no unit stride"); %(fail)s; default: PyErr_SetString(PyExc_ValueError, "some matrix has no unit stride"); %(fail)s;
}; };
//fprintf(stderr, "Calling sgemm %%i %%i %%i %%i took %%f\\n", unit, Nz1, Nz0, Nx1, time_time() - t0);
""" """
case_double = """ case_double = """
...@@ -278,6 +286,7 @@ class GemmRelated(Op): ...@@ -278,6 +286,7 @@ class GemmRelated(Op):
char T = 'T'; char T = 'T';
int Nz0 = Nz[0], Nz1 = Nz[1], Nx1 = Nx[1]; int Nz0 = Nz[0], Nz1 = Nz[1], Nx1 = Nx[1];
//std::cerr << (unit/256) MOD 16 << (unit / 16) MOD 16 << unit MOD 16<< '\\n'; //std::cerr << (unit/256) MOD 16 << (unit / 16) MOD 16 << unit MOD 16<< '\\n';
//double t0 = time_time();
switch(unit) switch(unit)
{ {
case 0x000: dgemm_(&N, &N, &Nz1, &Nz0, &Nx1, &a, y, &sy_0, x, &sx_0, &b, z, &sz_0); break; case 0x000: dgemm_(&N, &N, &Nz1, &Nz0, &Nx1, &a, y, &sy_0, x, &sx_0, &b, z, &sz_0); break;
...@@ -290,6 +299,7 @@ class GemmRelated(Op): ...@@ -290,6 +299,7 @@ class GemmRelated(Op):
case 0x111: dgemm_(&N, &N, &Nz0, &Nz1, &Nx1, &a, x, &sx_1, y, &sy_1, &b, z, &sz_1); break; case 0x111: dgemm_(&N, &N, &Nz0, &Nz1, &Nx1, &a, x, &sx_1, y, &sy_1, &b, z, &sz_1); break;
default: PyErr_SetString(PyExc_ValueError, "some matrix has no unit stride"); %(fail)s; default: PyErr_SetString(PyExc_ValueError, "some matrix has no unit stride"); %(fail)s;
}; };
//fprintf(stderr, "Calling dgemm %%i %%i %%i %%i took %%f\\n", unit, Nz1, Nz0, Nx1, time_time()- t0);
""" """
end_switch_typenum = """ end_switch_typenum = """
...@@ -319,7 +329,7 @@ class GemmRelated(Op): ...@@ -319,7 +329,7 @@ class GemmRelated(Op):
self.end_switch_typenum), '') self.end_switch_typenum), '')
def build_gemm_version(self): def build_gemm_version(self):
return (2,) return (4,)
class Gemm(GemmRelated): class Gemm(GemmRelated):
"""In-place version of matrix-matrix multiplication (with accumulation): """In-place version of matrix-matrix multiplication (with accumulation):
...@@ -442,6 +452,7 @@ class Gemm(GemmRelated): ...@@ -442,6 +452,7 @@ class Gemm(GemmRelated):
dims[0] = %(_z)s->dimensions[0]; dims[0] = %(_z)s->dimensions[0];
dims[1] = %(_z)s->dimensions[1]; dims[1] = %(_z)s->dimensions[1];
%(_zout)s = (PyArrayObject*)PyArray_SimpleNew(2, dims, type_num_%(_z)s); %(_zout)s = (PyArrayObject*)PyArray_SimpleNew(2, dims, type_num_%(_z)s);
//fprintf(stderr, "Gemm Allocating %%i %%i\\n", dims[0], dims[1]);
if(!%(_zout)s) { if(!%(_zout)s) {
PyErr_SetString(PyExc_MemoryError, "failed to alloc gemm_no_inplace output"); PyErr_SetString(PyExc_MemoryError, "failed to alloc gemm_no_inplace output");
%(fail)s %(fail)s
...@@ -515,7 +526,11 @@ class Gemm(GemmRelated): ...@@ -515,7 +526,11 @@ class Gemm(GemmRelated):
return full_code return full_code
def c_code_cache_version(self): def c_code_cache_version(self):
return (3,) + self.build_gemm_version() gv = self.build_gemm_version()
if gv:
return (3,) + gv
else:
return gv
gemm_inplace = Gemm(inplace=True) gemm_inplace = Gemm(inplace=True)
gemm_no_inplace = Gemm(inplace=False) gemm_no_inplace = Gemm(inplace=False)
...@@ -817,6 +832,7 @@ class Dot22(GemmRelated): ...@@ -817,6 +832,7 @@ class Dot22(GemmRelated):
dims[0] = %(_x)s->dimensions[0]; dims[0] = %(_x)s->dimensions[0];
dims[1] = %(_y)s->dimensions[1]; dims[1] = %(_y)s->dimensions[1];
%(_zout)s = (PyArrayObject*)PyArray_SimpleNew(2, dims, type_num_%(_x)s); %(_zout)s = (PyArrayObject*)PyArray_SimpleNew(2, dims, type_num_%(_x)s);
//fprintf(stderr, "Dot Allocating %%i %%i\\n", dims[0], dims[1]);
if(!%(_zout)s) { if(!%(_zout)s) {
PyErr_SetString(PyExc_MemoryError, "failed to alloc dot22 output"); PyErr_SetString(PyExc_MemoryError, "failed to alloc dot22 output");
%(fail)s %(fail)s
...@@ -841,7 +857,11 @@ class Dot22(GemmRelated): ...@@ -841,7 +857,11 @@ class Dot22(GemmRelated):
full_code = self.build_gemm_call() % dict(locals(), **sub) full_code = self.build_gemm_call() % dict(locals(), **sub)
return full_code return full_code
def c_code_cache_version(self): def c_code_cache_version(self):
return (1,) + self.build_gemm_version() gv = self.build_gemm_version()
if gv:
return (1,) + gv
else:
return gv
_dot22 = Dot22() _dot22 = Dot22()
...@@ -947,7 +967,11 @@ class Dot22Scalar(GemmRelated): ...@@ -947,7 +967,11 @@ class Dot22Scalar(GemmRelated):
full_code = self.build_gemm_call() % dict(locals(), **sub) full_code = self.build_gemm_call() % dict(locals(), **sub)
return full_code return full_code
def c_code_cache_version(self): def c_code_cache_version(self):
return (2,) + self.build_gemm_version() gv = self.build_gemm_version()
if gv:
return (2,) + gv
else:
return gv
_dot22scalar = Dot22Scalar() _dot22scalar = Dot22Scalar()
......
""" """
Contains an op for convolving input images with a set of filters. This was Contains an Op for convolving input images with a set of filters. This was
developed especially for Convolutional Neural Networks. developed especially for Convolutional Neural Networks.
For related ops, including downsampling and subsampling, see
tensor.signal and tensor.signal.downsample.
See especially conv2d().
""" """
__docformat__ = "restructuredtext en" __docformat__ = "restructuredtext en"
...@@ -764,7 +769,6 @@ using namespace std; ...@@ -764,7 +769,6 @@ using namespace std;
d["self_dx"]=self.dx d["self_dx"]=self.dx
d["self_dy"]=self.dy d["self_dy"]=self.dy
d["mode"]=self.out_mode.upper() d["mode"]=self.out_mode.upper()
d["mode"]=self.out_mode.upper()
d["affectation"]="=" d["affectation"]="="
if all_shape: if all_shape:
d["self_bsize"]=self.bsize d["self_bsize"]=self.bsize
...@@ -910,7 +914,7 @@ if(%(filtersflipped)s->nd==3){ ...@@ -910,7 +914,7 @@ if(%(filtersflipped)s->nd==3){
kerns_dim[1]=%(filtersflipped)s->dimensions[1]; kerns_dim[1]=%(filtersflipped)s->dimensions[1];
kerns_dim[0]=%(filtersflipped)s->dimensions[0]; kerns_dim[0]=%(filtersflipped)s->dimensions[0];
}else{ }else{
std:stringstream temp; std::stringstream temp;
temp << "nddim="<<%(filtersflipped)s->nd; temp << "nddim="<<%(filtersflipped)s->nd;
std::string param = temp.str(); std::string param = temp.str();
PyErr_SetString(PyExc_ValueError, PyErr_SetString(PyExc_ValueError,
...@@ -1145,7 +1149,7 @@ if(%(filtersflipped)s->nd==3){ ...@@ -1145,7 +1149,7 @@ if(%(filtersflipped)s->nd==3){
kerns_dim[1]=%(filtersflipped)s->dimensions[1]; kerns_dim[1]=%(filtersflipped)s->dimensions[1];
kerns_dim[0]=%(filtersflipped)s->dimensions[0]; kerns_dim[0]=%(filtersflipped)s->dimensions[0];
}else{ }else{
std:stringstream temp; std::stringstream temp;
temp << "nddim="<<%(filtersflipped)s->nd; temp << "nddim="<<%(filtersflipped)s->nd;
std::string param = temp.str(); std::string param = temp.str();
PyErr_SetString(PyExc_ValueError, PyErr_SetString(PyExc_ValueError,
...@@ -1377,7 +1381,7 @@ if(%(img2d)s->nd==2){ ...@@ -1377,7 +1381,7 @@ if(%(img2d)s->nd==2){
img2d_dim[1]=%(img2d)s->dimensions[1]; img2d_dim[1]=%(img2d)s->dimensions[1];
img2d_dim[0]=%(img2d)s->dimensions[0]; img2d_dim[0]=%(img2d)s->dimensions[0];
}else { }else {
std:stringstream temp; std::stringstream temp;
temp << "nddim="<<%(img2d)s->nd; temp << "nddim="<<%(img2d)s->nd;
std::string param = temp.str(); std::string param = temp.str();
PyErr_SetString(PyExc_ValueError, PyErr_SetString(PyExc_ValueError,
......
...@@ -143,16 +143,18 @@ def speed_multilayer_conv(): ...@@ -143,16 +143,18 @@ def speed_multilayer_conv():
validate=False# we don't validate the result to have it much faster! validate=False# we don't validate the result to have it much faster!
verbose=1 verbose=1
unroll_batch = [1,2,4,5,10,20] unroll_batch = [1,2,3,4,5,10]#15, 30, 60 always much slower
unroll_kern = [1,2,4,5,10,20] unroll_kern = [1,2,3,4,5,10]#15, 30, 60 always much slower
unroll_batch = [1,4,5] #unroll_batch = [1,4,5]
unroll_kern = [1,4,5] #unroll_kern = [1,4,5]
#unroll_batch = [1,4]
#unroll_kern = [1,4]
unroll_patch = [True, False] unroll_patch = [True, False]
bsize = 20 # batch size bsize = 60 # batch size
imshp_start = (1,48,48)#un square shape to test more corner case. imshp_start = (1,48,48)#un square shape to test more corner case.
kshps = ([11,12],[12,11])#un square shape to test more corner case. kshps = ([11,12],[12,11])#un square shape to test more corner case.
nkerns = [20,20] # per output pixel nkerns = [60,60] # per output pixel
ssizes = [(1,1),]#(1,1)]#(2,2) bugged ssizes = [(1,1),]#(1,1)]#(2,2) bugged
convmodes = ['valid','full'] convmodes = ['valid','full']
do_convolve2=False do_convolve2=False
...@@ -212,8 +214,10 @@ def speed_multilayer_conv(): ...@@ -212,8 +214,10 @@ def speed_multilayer_conv():
best=N.asarray(best) best=N.asarray(best)
worst=N.asarray(worst) worst=N.asarray(worst)
print "timing for unrolled version" print "timing for unrolled version"
print t_b_k print "unroll_batch/unroll_kern valid_mode full_mode"
print t for n_b in range(len(unroll_batch)):
for n_k in range(len(unroll_kern)):
print unroll_batch[n_b],"/",unroll_kern[n_k], " ",t[n_b,n_k]
t_detail=t t_detail=t
t = t.sum(axis=2) t = t.sum(axis=2)
print "max %.3fs"%t.max(), "max param(batch unloop size/kernel unloop size)", t_b_k[t.argmax()] print "max %.3fs"%t.max(), "max param(batch unloop size/kernel unloop size)", t_b_k[t.argmax()]
......
...@@ -88,10 +88,10 @@ class TestConv2D(unittest.TestCase): ...@@ -88,10 +88,10 @@ class TestConv2D(unittest.TestCase):
Tests that basic convolutions work for odd and even dimensions of image and filter Tests that basic convolutions work for odd and even dimensions of image and filter
shapes, as well as rectangular images and filters. shapes, as well as rectangular images and filters.
""" """
self.validate((3,2,8,8), (4,2,5,5), 'valid') self.validate((3,2,8,8), (4,2,5,5), 'valid', verify_grad=False)
self.validate((3,2,7,5), (5,2,2,3), 'valid') self.validate((3,2,7,5), (5,2,2,3), 'valid')
self.validate((3,2,7,5), (5,2,3,2), 'valid') self.validate((3,2,7,5), (5,2,3,2), 'valid', verify_grad=False)
self.validate((3,2,8,8), (4,2,5,5), 'full') self.validate((3,2,8,8), (4,2,5,5), 'full', verify_grad=False)
self.validate((3,2,7,5), (5,2,2,3), 'full') self.validate((3,2,7,5), (5,2,2,3), 'full')
# test filter same size as input # test filter same size as input
...@@ -105,7 +105,7 @@ class TestConv2D(unittest.TestCase): ...@@ -105,7 +105,7 @@ class TestConv2D(unittest.TestCase):
""" """
self.validate((3,2,7,5), (5,2,2,3), 'valid', unroll_patch=False) self.validate((3,2,7,5), (5,2,2,3), 'valid', unroll_patch=False)
self.validate((3,2,7,5), (5,2,2,3), 'full', unroll_patch=False) self.validate((3,2,7,5), (5,2,2,3), 'full', unroll_patch=False)
self.validate((3,2,3,3), (4,2,3,3), 'valid', unroll_patch=False) self.validate((3,2,3,3), (4,2,3,3), 'valid', unroll_patch=False, verify_grad=False)
def test_unroll_special(self): def test_unroll_special(self):
""" """
...@@ -175,7 +175,17 @@ class TestConv2D(unittest.TestCase): ...@@ -175,7 +175,17 @@ class TestConv2D(unittest.TestCase):
""" """
try: try:
self.validate((3,2,8,8), (4,2,5,5), 'valid', input = T.dmatrix()) self.validate((3,2,8,8), (4,2,5,5), 'valid', input = T.dmatrix())
# should never reach here
self.fail()
except:
pass
try:
self.validate((3,2,8,8), (4,2,5,5), 'valid', filters = T.dvector()) self.validate((3,2,8,8), (4,2,5,5), 'valid', filters = T.dvector())
# should never reach here
self.fail()
except:
pass
try:
self.validate((3,2,8,8), (4,2,5,5), 'valid', input = T.dtensor3()) self.validate((3,2,8,8), (4,2,5,5), 'valid', input = T.dtensor3())
# should never reach here # should never reach here
self.fail() self.fail()
......
...@@ -224,7 +224,12 @@ class MakeVector(T.Op): ...@@ -224,7 +224,12 @@ class MakeVector(T.Op):
def __str__(self): def __str__(self):
return self.__class__.__name__ return self.__class__.__name__
def perform(self, node, inputs, (out,)): def perform(self, node, inputs, (out,)):
out[0] = theano._asarray(inputs, dtype=node.outputs[0].dtype) # not calling theano._asarray as optimization
if out[0] is None:
out[0] = theano._asarray(inputs, dtype=node.outputs[0].dtype)
else:
# assume that out has correct dtype. there is no cheap way to check
out[0][...] = inputs
make_vector = MakeVector() make_vector = MakeVector()
...@@ -262,7 +267,10 @@ class Shape_i(T.Op): ...@@ -262,7 +267,10 @@ class Shape_i(T.Op):
raise TypeError('x has too few dimensions for Shape_i', (x, self.i)) raise TypeError('x has too few dimensions for Shape_i', (x, self.i))
return T.Apply(self, [x], [T.lscalar()]) return T.Apply(self, [x], [T.lscalar()])
def perform(self, node, (x, ), (out, )): def perform(self, node, (x, ), (out, )):
out[0] = theano._asarray(x.shape[self.i], dtype = 'int64') if out[0] is None:
out[0] = theano._asarray(x.shape[self.i], dtype='int64')
else:
out[0][...] = x.shape[self.i]
def grad(self, (x,), (gz,)): def grad(self, (x,), (gz,)):
return [None] return [None]
......
...@@ -603,8 +603,12 @@ def test_dot22scalar(): ...@@ -603,8 +603,12 @@ def test_dot22scalar():
#currently the canonizer don't always merge all Mul together... #currently the canonizer don't always merge all Mul together...
#that force the optimizer to make a recursive search witch it don't do now. #that force the optimizer to make a recursive search witch it don't do now.
#but it do it for 1 level of recursion. #but it do it for 1 level of recursion.
# assert _dot22scalar in [x.op for x in topo] # assert _dot22scalar in [x.op for x in topo]
# assert len(topo)==2 # assert len(topo)==2
### Fred,
### What are you talking about?
### -James (March 28 2010)
f(av,bv,cv) f(av,bv,cv)
f = theano.function([a,b,c],c * a*0.2*T.dot(a,b),mode=m2) f = theano.function([a,b,c],c * a*0.2*T.dot(a,b),mode=m2)
topo = f.maker.env.toposort() topo = f.maker.env.toposort()
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论