提交 3f650984 authored 作者: gdesjardins's avatar gdesjardins

merge (ps: I hate mercurial)

上级 f49e38f7
...@@ -200,3 +200,9 @@ AddConfigVar('warn.sum_sum_bug', ...@@ -200,3 +200,9 @@ AddConfigVar('warn.sum_sum_bug',
AddConfigVar('warn.sum_div_dimshuffle_bug', AddConfigVar('warn.sum_div_dimshuffle_bug',
"Warn if previous versions of Theano (between rev. 3bd9b789f5e8, 2010-06-16, and cfc6322e5ad4, 2010-08-03) would have given incorrect result. This bug was triggered by sum of division of dimshuffled tensors.", "Warn if previous versions of Theano (between rev. 3bd9b789f5e8, 2010-06-16, and cfc6322e5ad4, 2010-08-03) would have given incorrect result. This bug was triggered by sum of division of dimshuffled tensors.",
BoolParam(default_0_3)) BoolParam(default_0_3))
AddConfigVar('compute_test_value',
"If True, Theano will run each op at graph build time, using Constants, SharedVariables and the tag 'test_value' as inputs to the function. This helps the user track down problems in the graph before it gets optimized.",
EnumStr(True, False, 'warn', 'err'))
"""Apply for use with Tensors that implements shape propagation via variable.tag.shape
This is not used currently very used. It appear in some case, but I'm not sure it if work or if it is used by default.
It could help the current system to make it detect problem earlier when contructing the graph instead of during optimization.
"""
import sys
from theano import gof
def ishape(v):
try:
return (True, v.tag.shape)
except AttributeError:
return (False, (None,)*v.type.ndim)
class Apply(gof.Apply):
def __init__(self, op, inputs, outputs):
super(Apply, self).__init__(op, inputs, outputs)
if not inputs:
return
# if any input has any shape info, then propagate it
try:
provided, ishapes = zip(*[ishape(i) for i in inputs])
except AttributeError:
# i.type.ndim didn't make sense for some i
return
if provided == [False for i in inputs]:
# no input had a tag.shape
return
try:
infer_shape = op.infer_shape
except AttributeError:
# op has no infer_shape, that's fine
return
try:
oshapes = infer_shape(self, ishapes)
except NotImplementedError:
return
for o, oshp in zip(outputs, oshapes):
o.tag.shape = oshp
...@@ -8,9 +8,10 @@ compatible with `gof`'s :doc:`graph` routines. ...@@ -8,9 +8,10 @@ compatible with `gof`'s :doc:`graph` routines.
__docformat__ = "restructuredtext en" __docformat__ = "restructuredtext en"
from .. import config
import graph
import numpy
import utils import utils
from theano import config
class CLinkerObject(object): class CLinkerObject(object):
...@@ -322,6 +323,46 @@ class PureOp(object): ...@@ -322,6 +323,46 @@ class PureOp(object):
""" """
node = self.make_node(*inputs, **kwargs) node = self.make_node(*inputs, **kwargs)
self.add_tag_trace(node) self.add_tag_trace(node)
if config.compute_test_value:
# avoid circular import
from ..compile.sharedvalue import SharedVariable
run_perform = True
# build test input-values
input_vals = []
for ins in inputs:
if isinstance(ins, graph.Constant):
input_vals.append(ins.value)
elif isinstance(ins,numpy.ndarray):
input_vals.append(ins)
elif isinstance(ins,SharedVariable):
input_vals.append(ins.get_value(borrow=True))
elif isinstance(ins,graph.Variable) and hasattr(ins.tag, 'test_value'):
input_vals.append(ins.tag.test_value)
else:
# no test-value was specified, act accordingly
if config.compute_test_value == 'warn':
raise Warning('Cannot compute test value: input %s of Op %s missing default value')
run_perform = False
elif config.compute_test_value == 'err':
raise ValueError('Cannot compute test value: input %s of Op %s missing default value')
else:
# silently skip test
run_perform = False
# if all inputs have test-values, run the actual op
if run_perform:
# compute output value once with test inputs to validate graph
output_storage = [[None] * len(node.outputs)]
node.op.perform(node, input_vals, output_storage)
# add 'test_value' to output tags, so that downstream ops can use these
# numerical values as inputs to their perform method.
for (outval, node_output) in zip(output_storage, node.outputs):
node_output.tag.test_value = outval[0]
if self.default_output is not None: if self.default_output is not None:
return node.outputs[self.default_output] return node.outputs[self.default_output]
else: else:
......
import numpy
import unittest
import theano
from theano import tensor as T
class TestComputeTestValue(unittest.TestCase):
def test_variable_only(self):
theano.config.compute_test_value = True
x = T.matrix('x')
x.tag.test_value = numpy.random.rand(3,4)
y = T.matrix('y')
y.tag.test_value = numpy.random.rand(4,5)
# should work
z = T.dot(x,y)
# this test should fail
y.tag.test_value = numpy.random.rand(6,5)
self.assertRaises(ValueError, T.dot, x, y)
def test_compute_flag(self):
x = T.matrix('x')
y = T.matrix('y')
y.tag.test_value = numpy.random.rand(4,5)
# should skip computation of test value
theano.config.compute_test_value = False
z = T.dot(x,y)
# should fail one or another when flag is set
theano.config.compute_test_value = 'warn'
self.assertRaises(Warning, T.dot, x, y)
theano.config.compute_test_value = 'err'
self.assertRaises(ValueError, T.dot, x, y)
def test_string_var(self):
theano.config.compute_test_value = True
x = T.matrix('x')
x.tag.test_value = numpy.random.rand(3,4)
y = T.matrix('y')
y.tag.test_value = numpy.random.rand(4,5)
z = theano.shared(numpy.random.rand(5,6))
# should work
out = T.dot(T.dot(x,y), z)
def f(x,y,z):
return T.dot(T.dot(x,y),z)
# this test should fail
z.set_value(numpy.random.rand(7,6))
self.assertRaises(ValueError, f, x, y, z)
def test_shared(self):
theano.config.compute_test_value = True
x = T.matrix('x')
x.tag.test_value = numpy.random.rand(3,4)
y = theano.shared(numpy.random.rand(4,6), 'y')
# should work
z = T.dot(x,y)
# this test should fail
y.set_value(numpy.random.rand(5,6))
self.assertRaises(ValueError, T.dot, x, y)
def test_ndarray(self):
theano.config.compute_test_value = True
x = numpy.random.rand(2,3)
y = theano.shared(numpy.random.rand(3,6), 'y')
# should work
z = T.dot(x,y)
# this test should fail
x = numpy.random.rand(2,4)
self.assertRaises(ValueError, T.dot, x, y)
def test_constant(self):
theano.config.compute_test_value = True
x = T.constant(numpy.random.rand(2,3))
y = theano.shared(numpy.random.rand(3,6), 'y')
# should work
z = T.dot(x,y)
# this test should fail
x = T.constant(numpy.random.rand(2,4))
self.assertRaises(ValueError, T.dot, x, y)
...@@ -2188,7 +2188,7 @@ CudaNdarray_Dot(PyObject* _unused, PyObject* args) ...@@ -2188,7 +2188,7 @@ CudaNdarray_Dot(PyObject* _unused, PyObject* args)
} }
static PyObject * static PyObject *
filter(PyObject* __unsed_self, PyObject *args) // args = (data, broadcastable, strict, storage) filter(PyObject* __unsed_self, PyObject *args) // args = (data, broadcastable, strict)
{ {
/* /*
* TODO: DOC what this function should do in the various cases of * TODO: DOC what this function should do in the various cases of
...@@ -2282,10 +2282,10 @@ filter(PyObject* __unsed_self, PyObject *args) // args = (data, broadcastable, s ...@@ -2282,10 +2282,10 @@ filter(PyObject* __unsed_self, PyObject *args) // args = (data, broadcastable, s
Py_DECREF(rval); Py_DECREF(rval);
rval = NULL; rval = NULL;
} }
Py_DECREF(data);
Py_DECREF(py_data);
Py_DECREF(broadcastable);
} }
Py_DECREF(data);
Py_DECREF(py_data);
Py_DECREF(broadcastable);
return (PyObject*)rval; return (PyObject*)rval;
} }
} }
...@@ -2490,11 +2490,6 @@ CudaNdarray_new_nd(int nd) ...@@ -2490,11 +2490,6 @@ CudaNdarray_new_nd(int nd)
return (PyObject *) rval; return (PyObject *) rval;
} }
/**
* Initialize 'self' as a view of 'base', with memory storage 'data'
*/
int CudaNdarray_set_device_data(CudaNdarray * self, float * data, PyObject * base) int CudaNdarray_set_device_data(CudaNdarray * self, float * data, PyObject * base)
{ {
if (self->data_allocated) if (self->data_allocated)
......
...@@ -26,7 +26,7 @@ typedef float real; ...@@ -26,7 +26,7 @@ typedef float real;
#endif #endif
#ifndef SHARED_SIZE #ifndef SHARED_SIZE
#define SHARED_SIZE (16*1024) #define SHARED_SIZE (16*1024)
#endif #endif
...@@ -48,10 +48,10 @@ static T ceil_intdiv(T a, T b) ...@@ -48,10 +48,10 @@ static T ceil_intdiv(T a, T b)
/** /**
* struct CudaNdarray * struct CudaNdarray
* *
* This is a Python type. * This is a Python type.
* *
*/ */
struct CudaNdarray struct CudaNdarray
{ {
PyObject_HEAD PyObject_HEAD
...@@ -65,46 +65,40 @@ struct CudaNdarray ...@@ -65,46 +65,40 @@ struct CudaNdarray
/* Type-specific fields go here. */ /* Type-specific fields go here. */
//GpuTensorType::VoidTensor * vt; //GpuTensorType::VoidTensor * vt;
int nd; //the number of dimensions of the tensor int nd; //the number of dimensions of the tensor
// Client should acces host_structure via CudaNdarray_HOST_DIMS / CudaNdarray_HOST_STRIDES macros // Client should acces host_structure via CudaNdarray_HOST_DIMS / CudaNdarray_HOST_STRIDES macros
int * host_structure; //dim0, dim1, ... stride0, stride1, ... int * host_structure; //dim0, dim1, ... stride0, stride1, ...
int data_allocated; //the number of bytes allocated for devdata int data_allocated; //the number of bytes allocated for devdata
//device pointers (allocated by cudaMalloc) //device pointers (allocated by cudaMalloc)
int dev_structure_fresh; int dev_structure_fresh;
//dev_structure should be accessed via macros, otherwise may not be synchronized //dev_structure should be accessed via macros, otherwise may not be synchronized
int * dev_structure; //dim0, dim1, ..., stride0, stride1, ... int * dev_structure; //dim0, dim1, ..., stride0, stride1, ...
real* devdata; //pointer to data element [0,..,0]. real* devdata; //pointer to data element [0,..,0].
}; };
/* /*
* Return a CudaNdarray whose 'nd' dimensions are all 0. * Return a CudaNdarray whose 'nd' dimensions are all 0.
*/ */
PyObject * PyObject *
CudaNdarray_New(int nd=-1); CudaNdarray_New(int nd=-1);
/** /**
* Return 1 for a CudaNdarray otw 0 * Return 1 for a CudaNdarray otw 0
*/ */
int int
CudaNdarray_Check(const PyObject * ob); CudaNdarray_Check(const PyObject * ob);
/** /**
* Return 1 for a CudaNdarray otw 0 * Return 1 for a CudaNdarray otw 0
*/ */
int int
CudaNdarray_CheckExact(const PyObject * ob); CudaNdarray_CheckExact(const PyObject * ob);
/**
* Return true for a C-contiguous CudaNdarray, else false
*/
bool
CudaNdarray_is_c_contiguous(const CudaNdarray * self);
/**** /****
* Returns the number of elements necessary in host_structure and dev_structure for a given number of dimensions. * Returns the number of elements necessary in host_structure and dev_structure for a given number of dimensions.
*/ */
int int
cnda_structure_size(int nd) cnda_structure_size(int nd)
{ {
// dim0, dim1, ... // dim0, dim1, ...
...@@ -113,23 +107,23 @@ cnda_structure_size(int nd) ...@@ -113,23 +107,23 @@ cnda_structure_size(int nd)
return nd + nd + nd; return nd + nd + nd;
} }
const int * const int *
CudaNdarray_HOST_DIMS(const CudaNdarray * self) CudaNdarray_HOST_DIMS(const CudaNdarray * self)
{ {
return self->host_structure; return self->host_structure;
} }
const int * const int *
CudaNdarray_HOST_STRIDES(const CudaNdarray * self) CudaNdarray_HOST_STRIDES(const CudaNdarray * self)
{ {
return self->host_structure + self->nd; return self->host_structure + self->nd;
} }
const int * const int *
CudaNdarray_HOST_LOG2DIMS(const CudaNdarray * self) CudaNdarray_HOST_LOG2DIMS(const CudaNdarray * self)
{ {
return self->host_structure + 2*self->nd; return self->host_structure + 2*self->nd;
} }
void void
cnda_mark_dev_structure_dirty(CudaNdarray * self) cnda_mark_dev_structure_dirty(CudaNdarray * self)
{ {
self->dev_structure_fresh = 0; self->dev_structure_fresh = 0;
...@@ -196,7 +190,7 @@ CudaNdarray_Equal(CudaNdarray *cnda1, CudaNdarray *cnda2) ...@@ -196,7 +190,7 @@ CudaNdarray_Equal(CudaNdarray *cnda1, CudaNdarray *cnda2)
* *
* Does not sync structure to host. * Does not sync structure to host.
*/ */
void void
CudaNdarray_set_dim(CudaNdarray * self, int idx, int d) CudaNdarray_set_dim(CudaNdarray * self, int idx, int d)
{ {
if ((idx >= self->nd) || (idx < 0) || (d < 0)) if ((idx >= self->nd) || (idx < 0) || (d < 0))
...@@ -212,7 +206,7 @@ CudaNdarray_set_dim(CudaNdarray * self, int idx, int d) ...@@ -212,7 +206,7 @@ CudaNdarray_set_dim(CudaNdarray * self, int idx, int d)
cnda_mark_dev_structure_dirty(self); cnda_mark_dev_structure_dirty(self);
} }
} }
void void
CudaNdarray_set_stride(CudaNdarray * self, int idx, int s) CudaNdarray_set_stride(CudaNdarray * self, int idx, int s)
{ {
if ((idx >= self->nd) || (idx < 0)) if ((idx >= self->nd) || (idx < 0))
...@@ -231,7 +225,7 @@ CudaNdarray_set_stride(CudaNdarray * self, int idx, int s) ...@@ -231,7 +225,7 @@ CudaNdarray_set_stride(CudaNdarray * self, int idx, int s)
* *
* This means: recalculate the log2dims and transfer structure to the card * This means: recalculate the log2dims and transfer structure to the card
*/ */
int int
cnda_copy_structure_to_device(CudaNdarray * self) cnda_copy_structure_to_device(CudaNdarray * self)
{ {
cublasSetVector(cnda_structure_size(self->nd), sizeof(int), self->host_structure, 1, self->dev_structure, 1); cublasSetVector(cnda_structure_size(self->nd), sizeof(int), self->host_structure, 1, self->dev_structure, 1);
...@@ -245,7 +239,7 @@ cnda_copy_structure_to_device(CudaNdarray * self) ...@@ -245,7 +239,7 @@ cnda_copy_structure_to_device(CudaNdarray * self)
return 0; return 0;
} }
const int * const int *
CudaNdarray_DEV_DIMS(CudaNdarray * self) CudaNdarray_DEV_DIMS(CudaNdarray * self)
{ {
if (!self->dev_structure_fresh) if (!self->dev_structure_fresh)
...@@ -255,7 +249,7 @@ CudaNdarray_DEV_DIMS(CudaNdarray * self) ...@@ -255,7 +249,7 @@ CudaNdarray_DEV_DIMS(CudaNdarray * self)
} }
return self->dev_structure; return self->dev_structure;
} }
const int * const int *
CudaNdarray_DEV_STRIDES(CudaNdarray * self) CudaNdarray_DEV_STRIDES(CudaNdarray * self)
{ {
if (!self->dev_structure_fresh) if (!self->dev_structure_fresh)
...@@ -265,7 +259,7 @@ CudaNdarray_DEV_STRIDES(CudaNdarray * self) ...@@ -265,7 +259,7 @@ CudaNdarray_DEV_STRIDES(CudaNdarray * self)
} }
return self->dev_structure + self->nd; return self->dev_structure + self->nd;
} }
const int * const int *
CudaNdarray_DEV_LOG2DIMS(CudaNdarray * self) CudaNdarray_DEV_LOG2DIMS(CudaNdarray * self)
{ {
if (!self->dev_structure_fresh) if (!self->dev_structure_fresh)
...@@ -275,7 +269,7 @@ CudaNdarray_DEV_LOG2DIMS(CudaNdarray * self) ...@@ -275,7 +269,7 @@ CudaNdarray_DEV_LOG2DIMS(CudaNdarray * self)
} }
return self->dev_structure + 2*self->nd; return self->dev_structure + 2*self->nd;
} }
float * float *
CudaNdarray_DEV_DATA(const CudaNdarray * self) CudaNdarray_DEV_DATA(const CudaNdarray * self)
{ {
return self->devdata; return self->devdata;
...@@ -284,7 +278,7 @@ CudaNdarray_DEV_DATA(const CudaNdarray * self) ...@@ -284,7 +278,7 @@ CudaNdarray_DEV_DATA(const CudaNdarray * self)
/** /**
* Return the number of elements in the ndarray (product of the dimensions) * Return the number of elements in the ndarray (product of the dimensions)
*/ */
int int
CudaNdarray_SIZE(const CudaNdarray *self) CudaNdarray_SIZE(const CudaNdarray *self)
{ {
if (self->nd == -1) return 0; if (self->nd == -1) return 0;
...@@ -295,7 +289,7 @@ CudaNdarray_SIZE(const CudaNdarray *self) ...@@ -295,7 +289,7 @@ CudaNdarray_SIZE(const CudaNdarray *self)
} }
return size; return size;
} }
static PyObject * static PyObject *
CudaNdarray_SIZE_Object(const CudaNdarray *self, void *closure) CudaNdarray_SIZE_Object(const CudaNdarray *self, void *closure)
{ {
return PyInt_FromLong(CudaNdarray_SIZE(self)); return PyInt_FromLong(CudaNdarray_SIZE(self));
...@@ -326,7 +320,7 @@ int CudaNdarray_set_nd(CudaNdarray * self, const int nd) ...@@ -326,7 +320,7 @@ int CudaNdarray_set_nd(CudaNdarray * self, const int nd)
} }
self->dev_structure = NULL; self->dev_structure = NULL;
} }
if (self->host_structure) if (self->host_structure)
{ {
free(self->host_structure); free(self->host_structure);
self->host_structure = NULL; self->host_structure = NULL;
...@@ -392,41 +386,29 @@ int CudaNdarray_alloc_contiguous(CudaNdarray *self, const int nd, const inttype ...@@ -392,41 +386,29 @@ int CudaNdarray_alloc_contiguous(CudaNdarray *self, const int nd, const inttype
size = size * dim[i]; size = size * dim[i];
} }
if (CudaNdarray_is_c_contiguous(self) && (self->data_allocated == size)) if (self->data_allocated != size)
{
return 0;
}
// The structure of self will be reused with newly allocated memory.
// If self was a view, we should remove the reference to its base.
// (If base was already NULL, the following has no effect.)
Py_XDECREF(self->base);
self->base = NULL;
// If self is a view, do not try to free its memory
if (self->data_allocated && device_free(self->devdata))
{
self->devdata = NULL;
self->data_allocated = 0;
return -1;
}
assert(size>0);
self->devdata = (float*)device_malloc(size*sizeof(real));
if (!self->devdata)
{ {
CudaNdarray_set_nd(self,-1); if (device_free(self->devdata))
self->data_allocated = 0; {
self->devdata = 0; // Does this ever happen?? Do we need to set data_allocated or devdata to 0?
return -1; return -1;
}
assert(size>0);
self->devdata = (float*)device_malloc(size*sizeof(real));
if (!self->devdata)
{
CudaNdarray_set_nd(self,-1);
self->data_allocated = 0;
self->devdata = 0;
return -1;
}
if (0)
fprintf(stderr,
"Allocated devdata %p (self=%p)\n",
self->devdata,
self);
self->data_allocated = size;
} }
if (0)
fprintf(stderr,
"Allocated devdata %p (self=%p)\n",
self->devdata,
self);
self->data_allocated = size;
return 0; return 0;
} }
...@@ -434,7 +416,7 @@ int CudaNdarray_alloc_contiguous(CudaNdarray *self, const int nd, const inttype ...@@ -434,7 +416,7 @@ int CudaNdarray_alloc_contiguous(CudaNdarray *self, const int nd, const inttype
* Return a CudaNdarray whose 'nd' dimensions are set to dims, and allocated. * Return a CudaNdarray whose 'nd' dimensions are set to dims, and allocated.
*/ */
template<typename inttype> template<typename inttype>
PyObject * PyObject *
CudaNdarray_NewDims(int nd, const inttype * dims) CudaNdarray_NewDims(int nd, const inttype * dims)
{ {
CudaNdarray * rval = (CudaNdarray*)CudaNdarray_New(); CudaNdarray * rval = (CudaNdarray*)CudaNdarray_New();
...@@ -458,7 +440,7 @@ CudaNdarray_NewDims(int nd, const inttype * dims) ...@@ -458,7 +440,7 @@ CudaNdarray_NewDims(int nd, const inttype * dims)
int CudaNdarray_set_device_data(CudaNdarray * self, float * data, PyObject * base); int CudaNdarray_set_device_data(CudaNdarray * self, float * data, PyObject * base);
int CudaNdarray_set_device_data(CudaNdarray * self, float * data, CudaNdarray * base) int CudaNdarray_set_device_data(CudaNdarray * self, float * data, CudaNdarray * base)
{ {
return CudaNdarray_set_device_data(self, data, (PyObject *) base); return CudaNdarray_set_device_data(self, data, (PyObject *) base);
} }
/** /**
...@@ -493,10 +475,10 @@ int CudaNdarray_CopyFromCudaNdarray(CudaNdarray * self, CudaNdarray * other, boo ...@@ -493,10 +475,10 @@ int CudaNdarray_CopyFromCudaNdarray(CudaNdarray * self, CudaNdarray * other, boo
/** /**
* Transfer the contents of CudaNdarray `self` to a new numpy ndarray. * Transfer the contents of CudaNdarray `self` to a new numpy ndarray.
*/ */
PyObject * PyObject *
CudaNdarray_CreateArrayObj(CudaNdarray * self); CudaNdarray_CreateArrayObj(CudaNdarray * self);
PyObject * PyObject *
CudaNdarray_ZEROS(int n, int * dims); CudaNdarray_ZEROS(int n, int * dims);
/** /**
...@@ -517,7 +499,7 @@ int CudaNdarray_dimshuffle(CudaNdarray * self, unsigned int len, const int * pat ...@@ -517,7 +499,7 @@ int CudaNdarray_dimshuffle(CudaNdarray * self, unsigned int len, const int * pat
void fprint_CudaNdarray(FILE * fd, const CudaNdarray *self) void fprint_CudaNdarray(FILE * fd, const CudaNdarray *self)
{ {
fprintf(fd, "CudaNdarray <%p, %p> nd=%i dev_structure_fresh=%d data_allocated=%d\n", fprintf(fd, "CudaNdarray <%p, %p> nd=%i dev_structure_fresh=%d data_allocated=%d\n",
self, self->devdata, self->nd, self->dev_structure_fresh, self->data_allocated); self, self->devdata, self->nd, self->dev_structure_fresh, self->data_allocated);
fprintf(fd, "\tHOST_DIMS: "); fprintf(fd, "\tHOST_DIMS: ");
for (int i = 0; i < self->nd; ++i) for (int i = 0; i < self->nd; ++i)
{ {
...@@ -528,23 +510,23 @@ void fprint_CudaNdarray(FILE * fd, const CudaNdarray *self) ...@@ -528,23 +510,23 @@ void fprint_CudaNdarray(FILE * fd, const CudaNdarray *self)
{ {
fprintf(fd, "%i\t", CudaNdarray_HOST_STRIDES(self)[i]); fprintf(fd, "%i\t", CudaNdarray_HOST_STRIDES(self)[i]);
} }
int data=0; int data=0;
fprintf(fd, "\n\tDEV_DIMS: "); fprintf(fd, "\n\tDEV_DIMS: ");
for (int i = 0; i < self->nd; ++i) for (int i = 0; i < self->nd; ++i)
{ {
cublasGetVector(1, sizeof(int), cublasGetVector(1, sizeof(int),
self->dev_structure+i, 1, self->dev_structure+i, 1,
&data, 1); &data, 1);
fprintf(fd, "%i\t", data); fprintf(fd, "%i\t", data);
} }
fprintf(fd, "\n\tDEV_STRIDES: "); fprintf(fd, "\n\tDEV_STRIDES: ");
for (int i = 0; i < self->nd; ++i) for (int i = 0; i < self->nd; ++i)
{ {
cublasGetVector(1, sizeof(int), cublasGetVector(1, sizeof(int),
self->dev_structure + self->nd+i, 1, self->dev_structure + self->nd+i, 1,
&data, 1); &data, 1);
fprintf(fd, "%i \t", data); fprintf(fd, "%i \t", data);
} }
fprintf(fd, "\n"); fprintf(fd, "\n");
} }
......
...@@ -12,7 +12,8 @@ import numpy, theano ...@@ -12,7 +12,8 @@ import numpy, theano
#from copy import copy as python_copy #from copy import copy as python_copy
from theano import gof, shared from theano import gof, shared
from theano.gof import Apply, Constant, Op, Type, Value, Variable from theano.gof import Variable, Op, Type, Constant, Value
from theano.gof.apply_shape import Apply
from theano import gradient from theano import gradient
...@@ -286,6 +287,7 @@ def constant_or_value(x, rtype, name=None, ndim=None, dtype=None): ...@@ -286,6 +287,7 @@ def constant_or_value(x, rtype, name=None, ndim=None, dtype=None):
TensorType(dtype = x_.dtype, broadcastable = bcastable), TensorType(dtype = x_.dtype, broadcastable = bcastable),
x_.copy(), x_.copy(),
name=name) name=name)
rval.tag.shape = x_.shape
return rval return rval
else: else:
# leave the shape out of the type # leave the shape out of the type
...@@ -2976,6 +2978,15 @@ class SubtensorPrinter: ...@@ -2976,6 +2978,15 @@ class SubtensorPrinter:
pprint.assign(lambda pstate, r: r.owner and isinstance(r.owner.op, Subtensor), SubtensorPrinter()) pprint.assign(lambda pstate, r: r.owner and isinstance(r.owner.op, Subtensor), SubtensorPrinter())
def setsubtensor(x, y, idx_list, inplace=False):
print >> sys.stderr, "tensor.setsubtensor is deprecated - please use set_subtensor"
the_op = IncSubtensor(idx_list, inplace, set_instead_of_inc=True)
return the_op(x, y, *Subtensor.collapse(idx_list, lambda entry: isinstance(entry, Variable)))
def incsubtensor(x, y, idx_list, inplace=False):
print >> sys.stderr, "tensor.incsubtensor is deprecated - please use inc_subtensor"
the_op = IncSubtensor(idx_list, inplace, set_instead_of_inc=False)
return the_op(x, y, *Subtensor.collapse(idx_list, lambda entry: isinstance(entry, Variable)))
def set_subtensor(x, y, inplace=False): def set_subtensor(x, y, inplace=False):
"""Return x with the given subtensor overwritten by y. """Return x with the given subtensor overwritten by y.
...@@ -3499,12 +3510,25 @@ class Join(Op): ...@@ -3499,12 +3510,25 @@ class Join(Op):
def infer_shape(self, node, ishapes): def infer_shape(self, node, ishapes):
# ishapes[0] contains the size of the axis on which we join # Join op should get at least two inputs to join
# Join op should get at least one input to join
assert len(ishapes) > 1 assert len(ishapes) > 1
# Not sure this is needed anymore :( ... basically the apply_shape
# version of the apply node (i.e. the one defined in
# gof/apply_shape) calls infer_shape methods passing None to unknown
# inputs. It can handle NotImplementedError, so for now I just raise
# that whenever I get a None. Should we just remove gof/apply_shape
# if it is depricated ??
if ishapes[1] is None:
raise NotImplementedError
n_dim = len(ishapes[1]) n_dim = len(ishapes[1])
for shape in ishapes[1:]: for shape in ishapes[1:]:
assert shape is not None if shape is None:
raise NotImplementedError
for shape_i in shape:
if shape_i is None:
raise NotImplementedError
# at this point the inputs have been broadcasted so they should
# all have the same shape
assert len(shape) == n_dim assert len(shape) == n_dim
out_shapes = [] out_shapes = []
...@@ -3822,6 +3846,9 @@ def reshape(x, newshape, ndim=None, name=None): ...@@ -3822,6 +3846,9 @@ def reshape(x, newshape, ndim=None, name=None):
ndim = get_vector_length(newshape) ndim = get_vector_length(newshape)
op = Reshape(ndim, name) op = Reshape(ndim, name)
rval = op(x, newshape) rval = op(x, newshape)
if isinstance(newshape, (list, tuple)):
rval.tag.shape = newshape
return rval return rval
class Flatten(Op): class Flatten(Op):
......
...@@ -6,13 +6,16 @@ import numpy.distutils ...@@ -6,13 +6,16 @@ import numpy.distutils
from theano.configparser import config, AddConfigVar, StrParam from theano.configparser import config, AddConfigVar, StrParam
from theano.gof import (utils, Op, view_roots, PatternSub, DestroyHandler, from theano.gof import (utils, Op, view_roots, PatternSub, DestroyHandler,
SeqOptimizer, local_optimizer, Optimizer, LocalOptimizer, OpKeyOptimizer, SeqOptimizer, local_optimizer, Optimizer, LocalOptimizer, OpKeyOptimizer,
InconsistencyError, toolbox, SequenceDB, EquilibriumOptimizer, Apply) InconsistencyError, toolbox, SequenceDB, EquilibriumOptimizer)
from theano.printing import pprint, FunctionPrinter, debugprint from theano.printing import pprint, FunctionPrinter, debugprint
from theano.compile.mode import optdb from theano.compile.mode import optdb
from theano.gof.python25 import all, any from theano.gof.python25 import all, any
import theano.scalar import theano.scalar
import basic as T import basic as T
from theano.gof.apply_shape import Apply
#NB: this clobbers the builtin 'compile' symbol #NB: this clobbers the builtin 'compile' symbol
from theano import compile #to register the optimizer built by this file from theano import compile #to register the optimizer built by this file
......
...@@ -5,11 +5,12 @@ import numpy ...@@ -5,11 +5,12 @@ import numpy
import elemwise_cgen as cgen import elemwise_cgen as cgen
import theano import theano
from theano import gof from theano import gof
from theano.gof import Apply, Op from theano.gof import Op
from theano import scalar from theano import scalar
from theano.scalar import Scalar from theano.scalar import Scalar
from theano.printing import pprint from theano.printing import pprint
from theano.gof.python25 import all, any from theano.gof.python25 import all, any
from theano.gof.apply_shape import Apply
# tensor depends on elemwise to provide definitions for several ops # tensor depends on elemwise to provide definitions for several ops
......
...@@ -18,7 +18,7 @@ import theano ...@@ -18,7 +18,7 @@ import theano
from theano.tensor import (as_tensor_variable, blas, get_constant_value, from theano.tensor import (as_tensor_variable, blas, get_constant_value,
patternbroadcast) patternbroadcast)
from theano import Op, config from theano import Op, config
from theano.gof import Apply from theano.gof.apply_shape import Apply
from theano.gof.python25 import any from theano.gof.python25 import any
imported_scipy_signal = False imported_scipy_signal = False
......
...@@ -11,7 +11,7 @@ from theano.tensor import basic as tensor ...@@ -11,7 +11,7 @@ from theano.tensor import basic as tensor
from theano.tensor import elemwise, dmatrix, fmatrix, dvector, fvector from theano.tensor import elemwise, dmatrix, fmatrix, dvector, fvector
from theano.tensor import opt from theano.tensor import opt
from theano.compile import optdb from theano.compile import optdb
from theano.gof import Apply from theano.gof.apply_shape import Apply
from theano.tensor.nnet.sigm import sigmoid, softplus from theano.tensor.nnet.sigm import sigmoid, softplus
......
...@@ -6,6 +6,7 @@ ...@@ -6,6 +6,7 @@
import logging import logging
_logger = logging.getLogger('theano.tensor.opt') _logger = logging.getLogger('theano.tensor.opt')
import copy
import operator import operator
import itertools import itertools
import sys import sys
...@@ -572,6 +573,14 @@ class ShapeFeature(object): ...@@ -572,6 +573,14 @@ class ShapeFeature(object):
if hasattr(r.type,"broadcastable") and r.type.broadcastable[i]: if hasattr(r.type,"broadcastable") and r.type.broadcastable[i]:
return self.lscalar_one return self.lscalar_one
# NOTE: This may cause problems bacause the shape is not asserted
# there is an equivalent mechanism to do this, namely
# specify_shape that one should use
# If user provided size
#elif ( hasattr(r.tag,'shape') and
# r.tag.shape is not None and
# r.tag.shape[i] is not None):
# return T.constant(copy.copy(r.tag.shape[i]),dtype='int64')
else: else:
return Shape_i(i).make_node(r).outputs[0] return Shape_i(i).make_node(r).outputs[0]
...@@ -1084,6 +1093,7 @@ def local_alloc_elemwise(node): ...@@ -1084,6 +1093,7 @@ def local_alloc_elemwise(node):
return [node.op(*new)] return [node.op(*new)]
#TODO, global optimizer that lift the assert to the beginning of the graph. #TODO, global optimizer that lift the assert to the beginning of the graph.
#TODO, var.tag.shape to propagate the shape and lower the overhead of this op
#TODO, when all inputs can be optimized do all except one #TODO, when all inputs can be optimized do all except one
theano.configparser.AddConfigVar('experimental.local_alloc_elemwise', theano.configparser.AddConfigVar('experimental.local_alloc_elemwise',
...@@ -2731,8 +2741,14 @@ register_specialize(local_mul_specialize) ...@@ -2731,8 +2741,14 @@ register_specialize(local_mul_specialize)
@gof.local_optimizer([T.add]) @gof.local_optimizer([T.add])
def local_add_specialize(node): def local_add_specialize(node):
def fill_chain(v): def fill_chain(v):
# Not sure why this happens .. but I did not had the time to look
# into it, it probably has something to do with the dtype I'm
# providing the tag.shape of my variable
out = _fill_chain(v, node.inputs) out = _fill_chain(v, node.inputs)
return out if out[0].dtype != node.outputs[0].dtype:
return [T.cast(out[0], dtype = node.outputs[0].dtype)]
else:
return out
#here, we are past the point of canonicalization, so we don't want to put in un-necessary fills. #here, we are past the point of canonicalization, so we don't want to put in un-necessary fills.
if node.op == T.add: if node.op == T.add:
......
import numpy
import unittest
from theano.tests import unittest_tools as utt
import theano
import theano.tensor as T
class Test_incsubtensor(unittest.TestCase):
"""Partial testing.
What could be tested:
- increment vs set
- thing incremented: scalar, vector, matrix,
- increment/set: constant, scalar, vector, matrix
- indices: scalar vs slice, constant vs variable, out of bound, ...
- inplace
"""
def setUp(self):
utt.seed_rng()
def test_simple_ok(self):
"""Increments or sets part of a tensor by a scalar using full slice and
a partial slice depending on a scalar.
"""
a = T.dmatrix()
increment = T.dscalar()
sl1 = slice(None)
sl2_end = T.lscalar()
sl2 = slice(sl2_end)
for do_set in [False,True]:
if do_set:
resut = T.setsubtensor(a, increment, [sl1, sl2])
else:
resut = T.incsubtensor(a, increment, [sl1, sl2])
f = theano.function([a, increment, sl2_end], resut)
val_a = numpy.ones((5,5))
val_inc = 2.3
val_sl2_end = 2
result = f(val_a, val_inc, val_sl2_end)
expected_result = numpy.copy(val_a)
if do_set:
expected_result[:,:val_sl2_end] = val_inc
else:
expected_result[:,:val_sl2_end] += val_inc
self.assertTrue(numpy.array_equal(result, expected_result))
return
def test_grad(self):
a = T.dvector()
b = T.dvector()
def inc_slice(*s):
def just_numeric_args(a,b):
return T.incsubtensor(a, b, s)
return just_numeric_args
# vector
utt.verify_grad(
inc_slice(slice(2,4,None)),
(numpy.asarray([0,1,2,3,4,5.]),
numpy.asarray([9,9.]),))
# matrix
utt.verify_grad(
inc_slice(slice(1,2,None), slice(None, None, None)),
(numpy.asarray([[0,1],[2,3],[4,5.]]),
numpy.asarray([[9,9.]]),))
#single element
utt.verify_grad(
inc_slice(2, 1),
(numpy.asarray([[0,1],[2,3],[4,5.]]),
numpy.asarray(9.),))
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论