提交 c239bfec authored 作者: Olivier Delalleau's avatar Olivier Delalleau

Merged

......@@ -760,6 +760,10 @@ def function(inputs, outputs, mode=None, accept_inplace = False):
- EXPENSIVE_OPTIMIZATION TODO: NotImplemented
- PROFILE_MODE : allow to print a profile mode with mode.print_summary
- DEBUG_MODE : make all the check that we taught of(compare python and c,...)
:param accept_inplace: True iff the graph can contain inplace operations prior to the
optimization phase (default is False)
......
import time
import time, atexit
from ..gof.link import WrapLinkerMany
from ..gof.cutils import run_cthunk
from ..compile.mode import Mode, predefined_linkers
from ..compile.mode import Mode, predefined_linkers, register_mode, predefined_modes
from ..gof.cc import OpWiseCLinker
class ProfileMode(Mode):
......@@ -110,3 +110,19 @@ class ProfileMode(Mode):
sum(f for f, t, a in sotimes[n_ops_to_print:])*100,
sum(t for f, t, a in sotimes[n_ops_to_print:]))
print '(*) Op is running a c implementation'
register_mode('PROFILE_MODE',ProfileMode())
def atexit_print_default_profile_mode():
"""Print the summary of the predefied mode PROFILE_MODE if used.
This all to have the summary printed at exit when we do
THEANO_DEFAULT_MODE=PROFILE_MODE
"""
prof_mode=predefined_modes["PROFILE_MODE"]
if prof_mode.local_time[0]>0: prof_mode.print_summary()
#Register atexit_print_default_profile_mode to have the summary of the
#predefined mode PROFILE_MODE if it is used printed when the program terminate.
atexit.register(atexit_print_default_profile_mode)
"""Define RandomStreams, providing random number variables for Theano graphs."""
__docformat__ = "restructuredtext en"
import sys
import numpy
from ...gof import Container
from ...tensor import raw_random
from sharedvalue import SharedVariable, shared_constructor, shared
class RandomStateSharedVariable(SharedVariable):
pass
@shared_constructor
def randomstate_constructor(value, name=None, strict=False):
"""SharedVariable Constructor for RandomState"""
if not isinstance(value, numpy.random.RandomState):
raise TypeError
return RandomStateSharedVariable(
type=raw_random.random_state_type,
value=value,
name=name,
strict=strict)
class RandomStreams(object):
"""Module component with similar interface to numpy.random (numpy.random.RandomState)"""
random_state_variables = []
"""A list of pairs of the form (input_r, output_r). This will be over-ridden by the module
instance to contain stream generators.
"""
default_instance_seed = None
"""Instance variable should take None or integer value. Used to seed the random number
generator that provides seeds for member streams"""
gen_seedgen = None
"""numpy.RandomState instance that gen() uses to seed new streams.
"""
def updates(self):
return list(self.random_state_variables)
def __init__(self, seed=None):
"""
:type seed: None or int
:param seed: a default seed to initialize the RandomState instances after build. See
`RandomStreamsInstance.__init__` for more details.
"""
super(RandomStreams, self).__init__()
self.random_state_variables = []
self.default_instance_seed = seed
self.gen_seedgen = numpy.random.RandomState(seed)
def seed(self, seed=None):
"""Re-initialize each random stream
:param seed: each random stream will be assigned a unique state that depends
deterministically on this value.
:type seed: None or integer in range 0 to 2**30
:rtype: None
"""
seed = self.default_instance_seed if seed is None else seed
seedgen = numpy.random.RandomState(seed)
for old_r, new_r in self.random_state_variables:
old_r_seed = seedgen.randint(2**30)
old_r.value = numpy.random.RandomState(int(old_r_seed))
def __getitem__(self, item):
"""Retrieve the numpy RandomState instance associated with a particular stream
:param item: a variable of type RandomStateType, associated with this RandomStream
:rtype: numpy RandomState (or None, before initialize)
:note: This is kept for compatibility with `tensor.randomstreams.RandomStreams`. The
simpler syntax ``item.rng.value`` is also valid.
"""
return item.value
def __setitem__(self, item, val):
"""Set the numpy RandomState instance associated with a particular stream
:param item: a variable of type RandomStateType, associated with this RandomStream
:param val: the new value
:type val: numpy RandomState
:rtype: None
:note: This is kept for compatibility with `tensor.randomstreams.RandomStreams`. The
simpler syntax ``item.rng.value = val`` is also valid.
"""
item.value = val
def gen(self, op, *args, **kwargs):
"""Create a new random stream in this container.
:param op: a RandomFunction instance to
:param args: interpreted by `op`
:param kwargs: interpreted by `op`
:returns: The symbolic random draw part of op()'s return value. This function stores
the updated RandomStateType Variable for use at `build` time.
:rtype: TensorVariable
"""
seed = int(self.gen_seedgen.randint(2**30))
random_state_variable = shared(numpy.random.RandomState(seed))
new_r, out = op(random_state_variable, *args, **kwargs)
out.rng = random_state_variable
self.random_state_variables.append((random_state_variable, new_r))
return out
def binomial(self, *args, **kwargs):
"""Return a symbolic binomial sample
This is a shortcut for a call to `self.gen`
"""
return self.gen(raw_random.binomial, *args, **kwargs)
def uniform(self, *args, **kwargs):
"""Return a symbolic uniform sample
This is a shortcut for a call to `self.gen`
"""
return self.gen(raw_random.uniform, *args, **kwargs)
def normal(self, *args, **kwargs):
"""Return a symbolic normal sample
This is a shortcut for a call to `self.gen`
"""
return self.gen(raw_random.normal, *args, **kwargs)
def random_integers(self, *args, **kwargs):
"""Return a symbolic random integer sample
This is a shortcut for a call to `self.gen`
"""
return self.gen(raw_random.random_integers, *args, **kwargs)
__docformat__ = "restructuredtext en"
import sys
import unittest
import numpy
from theano.tensor import raw_random
from theano.compile.sandbox.shared_randomstreams import RandomStreams
from theano.compile.sandbox.pfunc import pfunc
from theano import tensor
from theano import compile, gof
class T_RandomStreams(unittest.TestCase):
def test_basics(self):
random = RandomStreams(234)
fn = pfunc([], random.uniform((2,2)), updates=random.updates())
gn = pfunc([], random.normal((2,2)), updates=random.updates())
fn_val0 = fn()
fn_val1 = fn()
gn_val0 = gn()
rng_seed = numpy.random.RandomState(234).randint(2**30)
rng = numpy.random.RandomState(int(rng_seed)) #int() is for 32bit
#print fn_val0
numpy_val0 = rng.uniform(size=(2,2))
numpy_val1 = rng.uniform(size=(2,2))
#print numpy_val0
assert numpy.all(fn_val0 == numpy_val0)
print fn_val0
print numpy_val0
print fn_val1
print numpy_val1
assert numpy.all(fn_val1 == numpy_val1)
def test_seed_fn(self):
random = RandomStreams(234)
fn = pfunc([], random.uniform((2,2)), updates=random.updates())
random.seed(888)
fn_val0 = fn()
fn_val1 = fn()
rng_seed = numpy.random.RandomState(888).randint(2**30)
rng = numpy.random.RandomState(int(rng_seed)) #int() is for 32bit
#print fn_val0
numpy_val0 = rng.uniform(size=(2,2))
numpy_val1 = rng.uniform(size=(2,2))
#print numpy_val0
assert numpy.all(fn_val0 == numpy_val0)
assert numpy.all(fn_val1 == numpy_val1)
def test_getitem(self):
random = RandomStreams(234)
out = random.uniform((2,2))
fn = pfunc([], out, updates=random.updates())
random.seed(888)
rng = numpy.random.RandomState()
rng.set_state(random[out.rng].get_state()) #tests getitem
fn_val0 = fn()
fn_val1 = fn()
numpy_val0 = rng.uniform(size=(2,2))
numpy_val1 = rng.uniform(size=(2,2))
assert numpy.all(fn_val0 == numpy_val0)
assert numpy.all(fn_val1 == numpy_val1)
def test_setitem(self):
random = RandomStreams(234)
out = random.uniform((2,2))
fn = pfunc([], out, updates=random.updates())
random.seed(888)
rng = numpy.random.RandomState(823874)
random[out.rng] = numpy.random.RandomState(823874)
fn_val0 = fn()
fn_val1 = fn()
numpy_val0 = rng.uniform(size=(2,2))
numpy_val1 = rng.uniform(size=(2,2))
assert numpy.all(fn_val0 == numpy_val0)
assert numpy.all(fn_val1 == numpy_val1)
if __name__ == '__main__':
from theano.tests import main
main("test_randomstreams")
......@@ -72,7 +72,7 @@ class BROKEN_ON_PURPOSE_StructuredDotCSC(gof.Op):
|| (%(z)s->dimensions[1] != %(b)s->dimensions[1])
)
{
if (%(z)s) Py_DECREF(%(z)s);
{Py_XDECREF(%(z)s);}
npy_intp dims[] = {0,0};
dims[0] = ((npy_int32 *)%(a_nrows)s->data)[0];
dims[1] = %(b)s->dimensions[1];
......@@ -189,13 +189,13 @@ class WeirdBrokenOp(gof.Op):
def c_code(self, node, name, (a,), (z,), sub):
if "inplace" in self.behaviour:
z_code = """
if (%(z)s) Py_DECREF(%(z)s);
{Py_XDECREF(%(z)s);}
Py_INCREF(%(a)s);
%(z)s = %(a)s;
"""
else:
z_code = """
if (%(z)s) Py_DECREF(%(z)s);
{Py_XDECREF(%(z)s);}
%(z)s = (PyArrayObject*) PyArray_SimpleNew(1, %(a)s->dimensions, %(a)s->descr->type_num);
"""
prep_vars = """
......
......@@ -144,18 +144,18 @@ def struct_gen(args, struct_builders, blocks, sub):
PyObject* err_msg = NULL;
PyObject* err_traceback = NULL;
PyErr_Fetch(&err_type, &err_msg, &err_traceback);
if (!err_type) {err_type = Py_None; Py_XINCREF(Py_None);}
if (!err_msg) {err_msg = Py_None; Py_XINCREF(Py_None);}
if (!err_traceback) {err_traceback = Py_None; Py_XINCREF(Py_None);}
if (!err_type) {err_type = Py_None;Py_INCREF(Py_None);}
if (!err_msg) {err_msg = Py_None; Py_INCREF(Py_None);}
if (!err_traceback) {err_traceback = Py_None; Py_INCREF(Py_None);}
PyObject* old_err_type = PyList_GET_ITEM(__ERROR, 0);
PyObject* old_err_msg = PyList_GET_ITEM(__ERROR, 1);
PyObject* old_err_traceback = PyList_GET_ITEM(__ERROR, 2);
PyList_SET_ITEM(__ERROR, 0, err_type);
PyList_SET_ITEM(__ERROR, 1, err_msg);
PyList_SET_ITEM(__ERROR, 2, err_traceback);
Py_XDECREF(old_err_type);
Py_XDECREF(old_err_msg);
Py_XDECREF(old_err_traceback);
{Py_XDECREF(old_err_type);}
{Py_XDECREF(old_err_msg);}
{Py_XDECREF(old_err_traceback);}
}
// The failure code is returned to index what code block failed.
return %(failure_var)s;
......@@ -222,7 +222,7 @@ def get_c_init(r, name, sub):
"""WRITEME"""
pre = "" """
py_%(name)s = Py_None;
Py_XINCREF(py_%(name)s);
{Py_XINCREF(py_%(name)s);}
""" % locals()
return pre + r.type.c_init(name, sub)
......@@ -230,14 +230,14 @@ def get_c_extract(r, name, sub):
"""WRITEME"""
pre = """
py_%(name)s = PyList_GET_ITEM(storage_%(name)s, 0);
Py_XINCREF(py_%(name)s);
{Py_XINCREF(py_%(name)s);}
""" % locals()
return pre + r.type.c_extract(name, sub)
def get_c_cleanup(r, name, sub):
"""WRITEME"""
post = """
Py_XDECREF(py_%(name)s);
{Py_XDECREF(py_%(name)s);}
""" % locals()
return r.type.c_cleanup(name, sub) + post
......@@ -247,9 +247,9 @@ def get_c_sync(r, name, sub):
if (!%(failure_var)s) {
%(sync)s
PyObject* old = PyList_GET_ITEM(storage_%(name)s, 0);
Py_XINCREF(py_%(name)s);
{Py_XINCREF(py_%(name)s);}
PyList_SET_ITEM(storage_%(name)s, 0, py_%(name)s);
Py_XDECREF(old);
{Py_XDECREF(old);}
}
""" % dict(sync = r.type.c_sync(name, sub), name = name, **sub)
......@@ -526,11 +526,15 @@ class CLinker(link.Linker):
This might contain duplicates.
"""
ret = ["-O3", "-w"]#-w means supress all warnings
# this is the param the -ffast-math activate. I put the explicitly as FillMissing must disable "-ffinite-math-only". Putting -ffast-math would make it disable all other parameter at the same time.
ret = ["-O3"]
# this is the param the -ffast-math activate. I put the explicitly as FillMissing must disable some of them. Putting -ffast-math would make it disable all other parameter at the same time.
ret += ["-fno-math-errno", "-funsafe-math-optimizations",
"-fno-signaling-nans", "-fcx-limited-range",
"-fno-rounding-math", "-ffinite-math-only"]
"-fno-rounding-math", "-ffinite-math-only",
"-Wno-unused-label",#the current code generate label event if they are not used. Could use gcc attribute for those label only
"-Wno-unused-variable",#idem as the precedent
"-Wno-write-strings",#generated by our code generator...
]
for x in [y.type for y in self.variables] + [y.op for y in self.node_order]:
try: ret += x.c_compile_args()
except utils.MethodNotDefined: pass
......
"""Driver for general gradient calculations."""
__docformat__ = "restructuredtext en"
import sys
import gof #, gof.variable
import numpy #for numeric_grad
from gof.python25 import all
import gof.utils
def warning(msg):
# replace this with logger.warning when adding logging support
print >> sys.stderr, 'WARNING', msg
_msg_retType = 'op.grad(...) returned a non-list'
_msg_badlen = 'op.grad(...) returned wrong number of gradients'
def grad_sources_inputs(sources, graph_inputs):
def grad_sources_inputs(sources, graph_inputs, warn_type=True):
"""
A gradient source is a pair (r, g_r), in which r is a variable, and g_r is a
variable that is a gradient wrt r.
A gradient source is a pair (``r``, ``g_r``), in which ``r`` is a `Variable`, and ``g_r`` is a
`Variable` that is a gradient wrt ``r``.
This function traverses the graph backward from the ``r`` sources,
calling ``op.grad(...)`` for all ops with some non-None gradient on an output.
The ``op.grad(...)`` functions are called like this:
This function traverses the graph backward from the 'r' sources,
calling L{Op.grad}(...) when it is provided by an L{Op}, and at least one of the
outputs of the L{Op} has an associated gradient.
.. code-block:: python
op.grad(op.inputs[:], [total_gradient(v for v in op.outputs)])
The L{Op.grad}(...) functions are called as such:
op.grad( op.inputs[0], grad(op.outputs[0]))
This call to ``op.grad`` should return a list or tuple: one symbolic gradient per input.
If ``op`` has a single input, then ``op.grad`` should return a list or tuple of length 1.
This function expects the L{Op.grad}(...) function to return the gradient
expression [variables] associated with the inputs of the L{Op}. The L{Op} should
return a list of variables corresponding to the gradients in the same order
as the inputs. If it has a single output it should return a list or tuple
of length 1.
For each input wrt to which ``op`` is not differentiable, it should return ``None`` instead
of a `Variable` instance.
For each input wrt to which an L{Op} is not differentiable, it should return
None instead of a variable instance.
If a source ``r`` receives a gradient from another source ``r2``, then the effective
gradient on ``r`` is the sum of both gradients.
@type sources: list
@param sources: gradient sources (explained below)
@type graph_inputs: list
@param graph_inputs: variables considered to be constant
:type sources: list of pairs of Variable: (v, gradient-on-v)
:param sources: gradients to back-propagate using chain rule
:type graph_inputs: list of Variable
:param graph_inputs: variables considered to be constant (do not backpropagate through
them)
@rtype: dictionary
@return: dictionary mapping each variable necessary for a source to its gradient.
:rtype: dictionary whose keys and values are of type `Variable`
:return: mapping from each Variable encountered in the backward traversal to its gradient.
"""
gmap = {}
for (r, g_r) in sources:
......@@ -90,8 +101,9 @@ def grad_sources_inputs(sources, graph_inputs):
len(g_inputs),
len(node.inputs))
for ii, (r, g_r) in enumerate(zip(node.inputs, g_inputs)):
if g_r and (r.type != g_r.type):
print 'WARNING: %s.grad returned a different type for input %i: %s vs. %s'%(node.op, ii, r.type, g_r.type)
if warn_type:
if g_r and (getattr(r,'type',0) != getattr(g_r,'type', 1)):
warning('%s.grad returned a different type for input %i: %s vs. %s'%(node.op, ii, r, g_r))
if g_r and len(sources) == 1 and sources[0][0].name and r.name:
g_r.name = "(d%s/d%s)" % (sources[0][0].name, r.name)
if g_r is not None:
......
......@@ -28,8 +28,9 @@ class ConvOp(Op):
#TODO: make the stacksize its own parameter, and make imshp a pair
def __init__(self, imshp, kshp, nkern, bsize, dx, dy, output_mode='valid', unroll_batch=0,
unroll_kern=0,
def __init__(self, imshp, kshp, nkern, bsize, dx, dy, output_mode='valid',
unroll_batch=4,
unroll_kern=4,
imshp_logical=None,
kshp_logical=None,
kshp_logical_top_aligned=True):
......@@ -57,6 +58,10 @@ class ConvOp(Op):
unroll_batch. If >0 will use a version that will unroll the batch loop by the value of the option. By default don't use this version of the code.
unroll_nkern. idem as unroll_batch but unroll the kernel loop.
The version is with unroll_batch=4 and unroll_nkern if possible(currenctly it don't support logical shape != physical shape) as this is what give the best performance in practice. This also tell that to have the best performance, you should have a batch size and a number of kernel multiple of 4. In the article:
Anatomy of High-Performance Matrix Multiplication by Kazushige Goto and Robert A. Van De Geijn, ACM Transactions on Mathematical Software, vol 34, No. 3, article 12, May 2008.
In figure 12, it give the value mr x nr, those value are the optimum to use for unroll_batch and unroll_kern. For x86_64 bits computer it is 4x4. Other architecture can have different value.(2x4 for x86, 8x8 for itanium,...)
"""
imshp = tuple(imshp)
if len(imshp)==2:
......@@ -473,8 +478,8 @@ if(%(filtersflipped)s->nd==3){
img2d = PyArray_Newshape(%(img2d)s,&img2d_shape, PyArray_CORDER);
img2d_arr = (PyArrayObject*)img2d;
if ((img2d_arr->strides[3] != sizeof(%(type)s))
|| (img2d_arr->strides[2] != img2d_arr->dimensions[3]*sizeof(%(type)s))){
if ((img2d_arr->strides[3] != (npy_intp)sizeof(%(type)s))
|| (img2d_arr->strides[2] != img2d_arr->dimensions[3]*(npy_intp)sizeof(%(type)s))){
contig = (PyObject*)(PyArray_GETCONTIGUOUS((PyArrayObject*)img2d));
Py_DECREF(img2d);
img2d = contig;
......@@ -487,8 +492,8 @@ img2d_arr = (PyArrayObject*)img2d;
filtersflipped = PyArray_Newshape(%(filtersflipped)s,&kerns_shape, PyArray_CORDER);
filtersflipped_arr = (PyArrayObject*)filtersflipped;
if ((filtersflipped_arr->strides[3] != sizeof(%(type)s))
|| (filtersflipped_arr->strides[2] != filtersflipped_arr->dimensions[3]*sizeof(%(type)s))){
if ((filtersflipped_arr->strides[3] != (npy_intp)sizeof(%(type)s))
|| (filtersflipped_arr->strides[2] != filtersflipped_arr->dimensions[3]*(npy_intp)sizeof(%(type)s))){
contig = (PyObject*)(PyArray_GETCONTIGUOUS((PyArrayObject*)filtersflipped));
Py_DECREF(filtersflipped);
filtersflipped = contig;
......@@ -517,9 +522,8 @@ if ((!%(z)s)
|| (%(z)s->dimensions[3] != dim_zz[1])
)
{
if (%(z)s) Py_DECREF(%(z)s);
{Py_XDECREF(%(z)s);}
npy_intp dims[4] = {0,0,0,0};
if(!dims) %(fail)s;
dims[0]=%(self_bsize)s;
dims[1]=%(self_nkern)s;
dims[2]=dim_zz[0];
......@@ -540,10 +544,10 @@ for(int b=0;b< %(self_bsize)s;b++){
for(int n_kern=0;n_kern<%(self_nkern)s;n_kern++){
//assertions
if (%(z)s->strides[0] != %(z)s->dimensions[1] *%(z)s->dimensions[2] *%(z)s->dimensions[3] * sizeof(%(type)s)) %(fail)s;
if (%(z)s->strides[1] != %(z)s->dimensions[2] * %(z)s->dimensions[3] * sizeof(%(type)s)) %(fail)s;
if (%(z)s->strides[2] != %(z)s->dimensions[3] * sizeof(%(type)s)) %(fail)s;
if (%(z)s->strides[3] != sizeof(%(type)s)) %(fail)s;
if (%(z)s->strides[0] != %(z)s->dimensions[1] *%(z)s->dimensions[2] *%(z)s->dimensions[3] * (npy_intp)sizeof(%(type)s)) %(fail)s;
if (%(z)s->strides[1] != %(z)s->dimensions[2] * %(z)s->dimensions[3] * (npy_intp)sizeof(%(type)s)) %(fail)s;
if (%(z)s->strides[2] != %(z)s->dimensions[3] * (npy_intp)sizeof(%(type)s)) %(fail)s;
if (%(z)s->strides[3] != (npy_intp)sizeof(%(type)s)) %(fail)s;
%(type)s * __restrict__ out=(%(type)s *)(PyArray_GETPTR2(%(z)s,b,n_kern));
for (int i = 0; i < dim_zz[0]*dim_zz[1]; ++i) out[i] = 0;
......@@ -717,8 +721,8 @@ if (NKERN != kerns_dim[0])
img2d = PyArray_Newshape(%(img2d)s,&img2d_shape, PyArray_CORDER);
img2d_arr = (PyArrayObject*)img2d;
if ((img2d_arr->strides[3] != sizeof(%(type)s))
|| (img2d_arr->strides[2] != img2d_arr->dimensions[3]*sizeof(%(type)s))){
if ((img2d_arr->strides[3] != (npy_intp)sizeof(%(type)s))
|| (img2d_arr->strides[2] != img2d_arr->dimensions[3]*(npy_intp)sizeof(%(type)s))){
contig = (PyObject*)(PyArray_GETCONTIGUOUS((PyArrayObject*)img2d));
Py_DECREF(img2d);
img2d = contig;
......@@ -746,7 +750,7 @@ if ((!%(z)s)
|| (%(z)s->dimensions[3] != dim_zz[1])
)
{
if (%(z)s) Py_DECREF(%(z)s);
{Py_XDECREF(%(z)s);}
npy_intp dims[4] = {0,0,0,0};
dims[0]=%(self_bsize)s;
dims[1]=%(self_nkern)s;
......@@ -764,7 +768,7 @@ Os[1] = dim_im[1]-dim_ker[1]+1;
// allocate a temporary buffer for storing the inner product of each nth kernel row
// with each row of an image
{
%(type)s * kbuf = (%(type)s *)malloc((Os[0] * NKERN + PyArray_Size((PyObject*)%(filtersflipped)s))* sizeof(%(type)s));
%(type)s * kbuf = (%(type)s *)malloc((Os[0] * NKERN + PyArray_Size((PyObject*)%(filtersflipped)s))* (npy_intp)sizeof(%(type)s));
int kbufstride = NKERN;
%(type)s * myfilters = kbuf + Os[0] * NKERN;
......@@ -809,7 +813,7 @@ for(int b=0;b< %(self_bsize)s;b++){
int imgview_stride = dim_im[1];
int filter_rows_stride =kerns_dim[1]*kerns_dim[2]*kerns_dim[3];
//remember, Fortran wants a column-major interpretation
assert(img2d->strides[3] == sizeof(%(type)s));
assert(img2d->strides[3] == (npy_intp)sizeof(%(type)s));
if (0){
std::cerr << "b " << b << " img_col " << img_col << " filterrow " << filter_row << " stackidx " <<stackidx << "\\n";
......@@ -958,8 +962,8 @@ if(%(filtersflipped)s->nd==3){
img2d = PyArray_Newshape(%(img2d)s,&img2d_shape, PyArray_CORDER);
img2d_arr = (PyArrayObject*)img2d;
if ((img2d_arr->strides[3] != sizeof(%(type)s))
|| (img2d_arr->strides[2] != img2d_arr->dimensions[3]*sizeof(%(type)s))){
if ((img2d_arr->strides[3] != (npy_intp)sizeof(%(type)s))
|| (img2d_arr->strides[2] != img2d_arr->dimensions[3]*(npy_intp)sizeof(%(type)s))){
contig = (PyObject*)(PyArray_GETCONTIGUOUS((PyArrayObject*)img2d));
Py_DECREF(img2d);
img2d = contig;
......@@ -972,8 +976,8 @@ img2d_arr = (PyArrayObject*)img2d;
filtersflipped = PyArray_Newshape(%(filtersflipped)s,&kerns_shape, PyArray_CORDER);
filtersflipped_arr = (PyArrayObject*)filtersflipped;
if ((filtersflipped_arr->strides[3] != sizeof(%(type)s))
|| (filtersflipped_arr->strides[2] != filtersflipped_arr->dimensions[3]*sizeof(%(type)s))){
if ((filtersflipped_arr->strides[3] != (npy_intp)sizeof(%(type)s))
|| (filtersflipped_arr->strides[2] != filtersflipped_arr->dimensions[3]*(npy_intp)sizeof(%(type)s))){
contig = (PyObject*)(PyArray_GETCONTIGUOUS((PyArrayObject*)filtersflipped));
Py_DECREF(filtersflipped);
filtersflipped = contig;
......@@ -1002,9 +1006,8 @@ if ((!%(z)s)
|| (%(z)s->dimensions[3] != dim_zz[1])
)
{
if (%(z)s) Py_DECREF(%(z)s);
{Py_XDECREF(%(z)s);}
npy_intp dims[4] = {0,0,0,0};
if(!dims) %(fail)s;
dims[0]=%(self_bsize)s;
dims[1]=%(self_nkern)s;
dims[2]=dim_zz[0];
......@@ -1025,10 +1028,10 @@ for(int b=0;b< %(self_bsize)s ;b+=%(unroll_bsize)s){
for(int n_kern=0;n_kern<%(self_nkern)s;n_kern+=%(unroll_ksize)s){
//assertions
if (%(z)s->strides[0] != %(z)s->dimensions[1] *%(z)s->dimensions[2] *%(z)s->dimensions[3] * sizeof(%(type)s)) %(fail)s;
if (%(z)s->strides[1] != %(z)s->dimensions[2] * %(z)s->dimensions[3] * sizeof(%(type)s)) %(fail)s;
if (%(z)s->strides[2] != %(z)s->dimensions[3] * sizeof(%(type)s)) %(fail)s;
if (%(z)s->strides[3] != sizeof(%(type)s)) %(fail)s;
if (%(z)s->strides[0] != %(z)s->dimensions[1] *%(z)s->dimensions[2] *%(z)s->dimensions[3] * (npy_intp)sizeof(%(type)s)) %(fail)s;
if (%(z)s->strides[1] != %(z)s->dimensions[2] * %(z)s->dimensions[3] * (npy_intp)sizeof(%(type)s)) %(fail)s;
if (%(z)s->strides[2] != %(z)s->dimensions[3] * (npy_intp)sizeof(%(type)s)) %(fail)s;
if (%(z)s->strides[3] != (npy_intp)sizeof(%(type)s)) %(fail)s;
"""%d
ret+=my_dup2("%(type)s * __restrict__ out%(unroll_iter)s=(%(type)s *)(PyArray_GETPTR2(%(z)s,b+%(unroll_biter)s,n_kern+%(unroll_kiter)s));")
ret+=my_dup("for (int i = 0; i < dim_zz[0]*dim_zz[1]; ++i) out%(unroll_iter)s[i] = 0;",unroll_bsize*unroll_ksize)
......
......@@ -819,7 +819,7 @@ class StructuredDotCSC(gof.Op):
|| (%(z)s->dimensions[1] != %(b)s->dimensions[1])
)
{
if (%(z)s) Py_DECREF(%(z)s);
{Py_XDECREF(%(z)s);}
npy_intp dims[] = {0,0};
dims[0] = ((npy_int32 *)%(a_nrows)s->data)[0];
dims[1] = %(b)s->dimensions[1];
......@@ -951,7 +951,7 @@ class StructuredDotCSR(gof.Op):
|| (%(z)s->dimensions[1] != %(b)s->dimensions[1]) //b's columns
)
{
if (%(z)s) Py_DECREF(%(z)s);
{Py_XDECREF(%(z)s);}
npy_intp dims[] = {0,0};
dims[0] = %(a_ptr)s->dimensions[0]-1;
dims[1] = %(b)s->dimensions[1];
......
......@@ -394,15 +394,15 @@ class TensorType(Type):
def c_sync(self, name, sub):
"""Override `CLinkerOp.c_sync` """
return """
Py_XDECREF(py_%(name)s);
{Py_XDECREF(py_%(name)s);}
if (!%(name)s) {
Py_XINCREF(Py_None);
Py_INCREF(Py_None);
py_%(name)s = Py_None;
}
else if ((void*)py_%(name)s != (void*)%(name)s) {
py_%(name)s = (PyObject*)%(name)s;
}
Py_XINCREF(py_%(name)s);
{Py_XINCREF(py_%(name)s);}
""" % locals()
def c_headers(self):
......@@ -2389,20 +2389,24 @@ outer = Outer()
def grad(cost, wrt, g_cost=None, consider_constant=[]):
"""
@type cost: L{Variable}
@type wrt: L{Variable} or list of L{Variable}s.
@type g_cost: L{Variable} broadcastable to size of I{cost}, or None
@param g_cost: an expression for the gradient through cost. The default is
{{{ones_like(cost)}}}
@param consider_constant: a list of expressions not to backpropagate through
@rtype: L{Variable} or list of L{Variable}s (depending upon I{wrt})
@return: symbolic expression of gradient of I{cost} with respect to I{wrt}.
If I{wrt} is a list, then return a list containing the gradient of I{cost} wrt
each element of the list. If an element of I{wrt} is not differentiable
with respect to the output, then a L{TensorConstant} with an appropriate
:type cost: `Variable`
:type wrt: `Variable` or list of `Variable`s.
:type g_cost: `Variable` broadcastable to size of `cost`, or None
:param g_cost: an expression for the gradient through cost. The default is
``ones_like(cost)``.
:param consider_constant: a list of expressions not to backpropagate through
:rtype: `Variable` or list of `Variable`s (depending upon `wrt`)
:return: symbolic expression of gradient of `cost` with respect to `wrt`.
If `wrt` is a list, then return a list containing the gradient of `cost` wrt
each element of the list. If an element of `wrt` is not differentiable
with respect to the output, then a `TensorConstant` with an appropriate
kind of zero is returned.
This function is a wrapper around a the more general function
`theano.gradient.grad_sources_inputs``.
"""
if not isinstance(cost, TensorVariable):
raise TypeError('In tensor.grad(), cost argument should be a TensorVariable.', cost)
......
......@@ -373,7 +373,36 @@ def test_mixeddiv():
# # print g
# # # print g.outputs[0].owner.c_code(['x', 'y', 'z'], ['e'], dict(fail = "FAIL;", id = 0))
# # print gof.OpWiseCLinker(g).make_function()(numpy.ones((5, 5)), numpy.ones((5, 5)), numpy.ones((5, 5)))
def test_const_type_in_mul_canonizer():
input = dmatrix()
w = dmatrix()
visb = dvector()
hidb = dvector()
betas = dvector()
a = dvector()
def sigm(x): return 1./(1+exp(-x))
hid = sigm( (dot(w,input) + hidb) * betas )
vis_gauss1 = (dot(w.T, hid) + visb) * betas / (2 * a * a)
vis_gauss2 = (dot(w.T, hid) + visb) * betas / (2. * a * a)
f1 = function([input,w,visb,hidb,betas,a],vis_gauss1)
f2 = function([input,w,visb,hidb,betas,a],vis_gauss2)
ival = numpy.random.rand(5,5)
wval = numpy.random.rand(5,5)
visbval = numpy.random.rand(5)
hidbval = numpy.random.rand(5)
betaval = numpy.random.rand(5)
aval = numpy.random.rand(5)
assert numpy.allclose(
f2(ival, wval, visbval, hidbval, betaval, aval),
f1(ival, wval, visbval, hidbval, betaval, aval))
......
......@@ -9,6 +9,11 @@ from theano import gof
from theano.gradient import *
from theano import gradient
def _grad_sources_inputs(*args):
# warn_type was introduced after this code, it complains throughout for nothing.
return grad_sources_inputs(warn_type=False, *args)
class test_grad_sources_inputs(unittest.TestCase):
def test_retNone1(self):
"""Test that it is not ok to return None from op.grad()"""
......@@ -21,7 +26,7 @@ class test_grad_sources_inputs(unittest.TestCase):
pass
a = retNone().make_node()
try:
grad_sources_inputs([(a.out, 1)], None)
_grad_sources_inputs([(a.out, 1)], None)
except ValueError, e:
self.failUnless(e[0] is gradient._msg_retType)
return
......@@ -36,7 +41,7 @@ class test_grad_sources_inputs(unittest.TestCase):
return [None]
i = gof.generic()
a = retNone().make_node(i)
g = grad_sources_inputs([(a.out, 1)], None)
g = _grad_sources_inputs([(a.out, 1)], None)
self.failUnless(not i in g)
def test_wrong_rval_len1(self):
......@@ -51,10 +56,10 @@ class test_grad_sources_inputs(unittest.TestCase):
i = gof.generic()
j = gof.generic()
a1 = retNone().make_node(i)
g = grad_sources_inputs([(a1.out, 1)], None)
g = _grad_sources_inputs([(a1.out, 1)], None)
a2 = retNone().make_node(i,j)
try:
g = grad_sources_inputs([(a2.out, 1)], None)
g = _grad_sources_inputs([(a2.out, 1)], None)
except ValueError, e:
self.failUnless(e[0] is gradient._msg_badlen)
return
......@@ -74,7 +79,7 @@ class test_grad_sources_inputs(unittest.TestCase):
i = gof.generic()
a1 = retNone(self).make_node(i)
g = grad_sources_inputs([(a1.out, None)], None)
g = _grad_sources_inputs([(a1.out, None)], None)
def test_1in_1out(self):
"""Test grad is called correctly for a 1-to-1 op"""
......@@ -87,7 +92,7 @@ class test_grad_sources_inputs(unittest.TestCase):
def grad(self, (x, ), (gz, )):
return gval,
a1 = O().make_node()
g = grad_sources_inputs([(a1.outputs[0], 1)], None)
g = _grad_sources_inputs([(a1.outputs[0], 1)], None)
self.failUnless(g[a1.inputs[0]] is gval)
def test_1in_Nout(self):
......@@ -101,7 +106,7 @@ class test_grad_sources_inputs(unittest.TestCase):
def grad(self, (x, ), (gz1, gz2)):
return gval,
a1 = O().make_node()
g = grad_sources_inputs([(a1.outputs[0], 1)], None)
g = _grad_sources_inputs([(a1.outputs[0], 1)], None)
self.failUnless(g[a1.inputs[0]] is gval)
def test_Nin_1out(self):
"""Test grad is called correctly for a many-to-1 op"""
......@@ -115,7 +120,7 @@ class test_grad_sources_inputs(unittest.TestCase):
def grad(self, (x0,x1), (gz, )):
return (gval0, gval1)
a1 = O().make_node()
g = grad_sources_inputs([(a1.outputs[0], 1)], None)
g = _grad_sources_inputs([(a1.outputs[0], 1)], None)
self.failUnless(g[a1.inputs[0]] is gval0)
self.failUnless(g[a1.inputs[1]] is gval1)
def test_Nin_Nout(self):
......@@ -130,7 +135,7 @@ class test_grad_sources_inputs(unittest.TestCase):
def grad(self, (x0,x1), (gz0,gz1)):
return gval0, gval1
a1 = O().make_node()
g = grad_sources_inputs([(a1.outputs[0], 1)], None)
g = _grad_sources_inputs([(a1.outputs[0], 1)], None)
self.failUnless(g[a1.inputs[0]] is gval0)
self.failUnless(g[a1.inputs[1]] is gval1)
def test_some_None_ograds(self):
......@@ -145,7 +150,7 @@ class test_grad_sources_inputs(unittest.TestCase):
return [1]
i = gof.generic()
a1 = O(self).make_node(i)
g = grad_sources_inputs([(a1.outputs[0], 1)], None)
g = grad_sources_inputs([(a1.outputs[0], 1)], None, warn_type=False)
self.failUnless(g[i] is 1)
def test_some_None_igrads(self):
......@@ -167,12 +172,12 @@ class test_grad_sources_inputs(unittest.TestCase):
k = gof.generic()
a1 = O(self, True).make_node(i,j)
a2 = O(self, True).make_node(a1.outputs[1], k)
g = grad_sources_inputs([(a2.outputs[0], 1)], None)
g = grad_sources_inputs([(a2.outputs[0], 1)], None, warn_type=False)
self.failUnless(g[i] is 1 and j not in g and k not in g)
a1 = O(self, True).make_node(i,j)
a2 = O(self, True).make_node(k, a1.outputs[1])
g = grad_sources_inputs([(a2.outputs[0], 1)], None)
g = _grad_sources_inputs([(a2.outputs[0], 1)], None)
self.failUnless(g[k] is 1 and i not in g and j not in g)
def test_inputs(self):
......@@ -197,7 +202,7 @@ class test_grad_sources_inputs(unittest.TestCase):
k = gof.generic()
a1 = O(self, True).make_node(i,j)
a2 = O(self, True).make_node(k,a1.outputs[1])
g = grad_sources_inputs([(a2.outputs[0], 1), (a1.outputs[1],4),
g = _grad_sources_inputs([(a2.outputs[0], 1), (a1.outputs[1],4),
(a1.outputs[0], 3), (a1.outputs[0], 3)], a1.outputs)
self.failUnless(g[a2.inputs[0]] == 1)
self.failUnless(g[a2.inputs[1]] == 5)
......@@ -228,7 +233,7 @@ class test_grad_sources_inputs(unittest.TestCase):
k = gof.generic()
a1 = O(self,True).make_node(i,j)
a2 = O(self,True).make_node(k,a1.outputs[1])
g = grad_sources_inputs([(a2.outputs[0], 1), (a1.outputs[1],4),
g = _grad_sources_inputs([(a2.outputs[0], 1), (a1.outputs[1],4),
(a1.outputs[0], 3), (a1.outputs[0], 3)], None)
self.failUnless(g[a2.inputs[0]] == 1)
self.failUnless(g[a2.inputs[1]] == 5)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论