merge

7982862b · James Bergstra · 69860a07 · 42555fbd · 7982862b · 7982862b
--- a/README.1st
+++ b/README.1st
 THEANO
 Documentation et al is in Trac:
-   http://lgcm:8000/testenv/wiki/WikiStart
+   http://lgcm.iro.umontreal.ca:8000/theano/wiki/WikiStart
 The lisa twiki is deprecated for documenting Theano.

--- a/__init__.py
+++ b/__init__.py
 import gof
-import base_tensor
 import tensor
 import sparse
 import compile
 import gradient
 import opt
-from base_tensor import *
 from tensor import *
 from compile import *
 from opt import *

--- a/_test_base_tensor.py
+++ b/_test_base_tensor.py
-from base_tensor import *
-import unittest
-from copy import copy
-from compile import Function
-import gof
-def _tensor(data, broadcastable=None, name=None):
-    """Return a BaseTensor containing given data"""
-    data = numpy.asarray(data)
-    if broadcastable is None:
-        broadcastable = [s==1 for s in data.shape]
-    elif broadcastable in [0, 1]:
-        broadcastable = [broadcastable] *  len(data.shape)
-    rval = BaseTensor(data.dtype, broadcastable, name)
-    rval.data = data # will raise if broadcastable was mis-specified
-    return rval
-class T_tensor(unittest.TestCase):
-    def test0(self): # allocate from a scalar float
-        t = _tensor(1.0)
-        self.failUnless(isinstance(t, BaseTensor))
-        self.failUnless(t.dtype == 'float64')
-        self.failUnless(t.broadcastable == ())
-        self.failUnless(t.role == None)
-        self.failUnless(isinstance(t.data, numpy.ndarray))
-        self.failUnless(str(t.data.dtype) == 'float64')
-        self.failUnless(t.data == 1.0)
-    def test0_int(self): # allocate from a scalar float
-        t = _tensor(1)
-        self.failUnless(isinstance(t, BaseTensor))
-        self.failUnless(t.dtype == 'int64' or t.dtype == 'int32')
-    def test1(self): # allocate from a vector of ints, not broadcastable
-        t = _tensor(numpy.ones(5,dtype='int32'))
-        self.failUnless(isinstance(t, BaseTensor))
-        self.failUnless(t.dtype == 'int32')
-        self.failUnless(t.broadcastable == (0,))
-        self.failUnless(isinstance(t.data, numpy.ndarray))
-        self.failUnless(str(t.data.dtype) == 'int32')
-    def test2(self): # allocate from a column matrix of complex with name
-        t = _tensor(numpy.ones((5,1),dtype='complex64'),name='bart')
-        self.failUnless(isinstance(t, BaseTensor))
-        self.failUnless(t.dtype == 'complex64')
-        self.failUnless(t.broadcastable == (0,1))
-        self.failUnless(isinstance(t.data, numpy.ndarray))
-        self.failUnless(t.name == 'bart')
-    def test2b(self): # allocate from a column matrix, not broadcastable
-        t = _tensor(numpy.ones((5,1),dtype='complex64'),broadcastable=0)
-        self.failUnless(isinstance(t, BaseTensor))
-        self.failUnless(t.dtype == 'complex64')
-        self.failUnless(t.broadcastable == (0,0))
-        self.failUnless(isinstance(t.data, numpy.ndarray))
-        f = Function([t], [t], linker_cls=gof.CLinker)
-        self.failUnless(numpy.all(t.data == f(t.data)))
-    def test_data_normal(self): #test that assigning to .data works when it should
-        t = _tensor(numpy.ones((5,1),dtype='complex64'), broadcastable=0)
-        o27 = numpy.ones((2,7), dtype='complex64')
-        t.data = o27
-        lst = t._data
-        self.failUnless(t.data.shape == (2,7))
-        self.failUnless(t.data is o27)
-        self.failUnless(t._data is lst)
-    def test_data_badrank0(self):
-        t = _tensor(numpy.ones((5,1),dtype='complex64'), broadcastable=0)
-        try:
-            t.data = numpy.ones((2,7,1))
-            self.fail()
-        except ValueError, e:
-            self.failUnless(e[0] is BaseTensor.filter.E_rank)
-        try:
-            t.data = numpy.ones(1)
-            self.fail()
-        except ValueError, e:
-            self.failUnless(e[0] is BaseTensor.filter.E_rank)
-    def test_data_badrank1(self):
-        t = _tensor(numpy.ones((1,1),dtype='complex64'), broadcastable=1)
-        try:
-            t.data = numpy.ones((1,1,1))
-            self.fail()
-        except ValueError, e:
-            self.failUnless(e[0] is BaseTensor.filter.E_rank)
-        try:
-            t.data = numpy.ones(1)
-            self.fail()
-        except ValueError, e:
-            self.failUnless(e[0] is BaseTensor.filter.E_rank)
-    def test_data_badshape0(self):
-        t = _tensor(numpy.ones((1,1),dtype='complex64'), broadcastable=1)
-        try:
-            t.data = numpy.ones((1,2))
-            self.fail()
-        except ValueError, e:
-            self.failUnless(e[0] is BaseTensor.filter.E_shape)
-        try:
-            t.data = numpy.ones((0,1))
-            self.fail()
-        except ValueError, e:
-            self.failUnless(e[0] is BaseTensor.filter.E_shape)
-    def test_cast0(self):
-        t = BaseTensor('float32', [0])
-        t.data = numpy.random.rand(4) > 0.5
-        self.failUnless(str(t.data.dtype) == t.dtype)
-class T_stdlib(unittest.TestCase):
-    def test0(self):
-        t = _tensor(1.0)
-        tt = t.clone(False)
-        self.failUnless(t.dtype == tt.dtype)
-        self.failUnless(t.broadcastable is tt.broadcastable)
-        self.failUnless(tt.data is None)
-        self.failUnless(t.data == 1.0)
-    def test0b(self):
-        t = _tensor(1.0)
-        tt = t.clone()
-        self.failUnless(t.dtype == tt.dtype)
-        self.failUnless(t.broadcastable is tt.broadcastable)
-        self.failUnless(tt.data is None)
-        self.failUnless(t.data == 1.0)
-    def test1(self):
-        t = _tensor(1.0)
-        tt = t.clone(True)
-        self.failUnless(t.dtype == tt.dtype)
-        self.failUnless(t.broadcastable is tt.broadcastable)
-        self.failUnless(tt.data == 1.0)
-        self.failUnless(t.data == 1.0)
-        self.failUnless(t.data is not tt.data)
-    def test1b(self):
-        t = _tensor(1.0)
-        tt = copy(t)
-        self.failUnless(t.dtype == tt.dtype)
-        self.failUnless(t.broadcastable is tt.broadcastable)
-        self.failUnless(tt.data == 1.0)
-        self.failUnless(t.data == 1.0)
-        self.failUnless(t.data is not tt.data)
-if __name__ == '__main__':
-    unittest.main()
--- a/_test_tensor.py
+++ b/_test_tensor.py
@@ -1280,5 +1280,142 @@ class t_gemm(unittest.TestCase):
        self.fail()
+def _tensor(data, broadcastable=None, name=None):
+    """Return a Tensor containing given data"""
+    data = numpy.asarray(data)
+    if broadcastable is None:
+        broadcastable = [s==1 for s in data.shape]
+    elif broadcastable in [0, 1]:
+        broadcastable = [broadcastable] *  len(data.shape)
+    rval = Tensor(data.dtype, broadcastable, name)
+    rval.data = data # will raise if broadcastable was mis-specified
+    return rval
+class T_tensor(unittest.TestCase):
+    def test0(self): # allocate from a scalar float
+        t = _tensor(1.0)
+        self.failUnless(isinstance(t, Tensor))
+        self.failUnless(t.dtype == 'float64')
+        self.failUnless(t.broadcastable == ())
+        self.failUnless(t.role == None)
+        self.failUnless(isinstance(t.data, numpy.ndarray))
+        self.failUnless(str(t.data.dtype) == 'float64')
+        self.failUnless(t.data == 1.0)
+    def test0_int(self): # allocate from a scalar float
+        t = _tensor(1)
+        self.failUnless(isinstance(t, Tensor))
+        self.failUnless(t.dtype == 'int64' or t.dtype == 'int32')
+    def test1(self): # allocate from a vector of ints, not broadcastable
+        t = _tensor(numpy.ones(5,dtype='int32'))
+        self.failUnless(isinstance(t, Tensor))
+        self.failUnless(t.dtype == 'int32')
+        self.failUnless(t.broadcastable == (0,))
+        self.failUnless(isinstance(t.data, numpy.ndarray))
+        self.failUnless(str(t.data.dtype) == 'int32')
+    def test2(self): # allocate from a column matrix of complex with name
+        t = _tensor(numpy.ones((5,1),dtype='complex64'),name='bart')
+        self.failUnless(isinstance(t, Tensor))
+        self.failUnless(t.dtype == 'complex64')
+        self.failUnless(t.broadcastable == (0,1))
+        self.failUnless(isinstance(t.data, numpy.ndarray))
+        self.failUnless(t.name == 'bart')
+    def test2b(self): # allocate from a column matrix, not broadcastable
+        t = _tensor(numpy.ones((5,1),dtype='complex64'),broadcastable=0)
+        self.failUnless(isinstance(t, Tensor))
+        self.failUnless(t.dtype == 'complex64')
+        self.failUnless(t.broadcastable == (0,0))
+        self.failUnless(isinstance(t.data, numpy.ndarray))
+        f = Function([t], [t], linker_cls=gof.CLinker)
+        self.failUnless(numpy.all(t.data == f(t.data)))
+    def test_data_normal(self): #test that assigning to .data works when it should
+        t = _tensor(numpy.ones((5,1),dtype='complex64'), broadcastable=0)
+        o27 = numpy.ones((2,7), dtype='complex64')
+        t.data = o27
+        lst = t._data
+        self.failUnless(t.data.shape == (2,7))
+        self.failUnless(t.data is o27)
+        self.failUnless(t._data is lst)
+    def test_data_badrank0(self):
+        t = _tensor(numpy.ones((5,1),dtype='complex64'), broadcastable=0)
+        try:
+            t.data = numpy.ones((2,7,1))
+            self.fail()
+        except ValueError, e:
+            self.failUnless(e[0] is Tensor.filter.E_rank)
+        try:
+            t.data = numpy.ones(1)
+            self.fail()
+        except ValueError, e:
+            self.failUnless(e[0] is Tensor.filter.E_rank)
+    def test_data_badrank1(self):
+        t = _tensor(numpy.ones((1,1),dtype='complex64'), broadcastable=1)
+        try:
+            t.data = numpy.ones((1,1,1))
+            self.fail()
+        except ValueError, e:
+            self.failUnless(e[0] is Tensor.filter.E_rank)
+        try:
+            t.data = numpy.ones(1)
+            self.fail()
+        except ValueError, e:
+            self.failUnless(e[0] is Tensor.filter.E_rank)
+    def test_data_badshape0(self):
+        t = _tensor(numpy.ones((1,1),dtype='complex64'), broadcastable=1)
+        try:
+            t.data = numpy.ones((1,2))
+            self.fail()
+        except ValueError, e:
+            self.failUnless(e[0] is Tensor.filter.E_shape)
+        try:
+            t.data = numpy.ones((0,1))
+            self.fail()
+        except ValueError, e:
+            self.failUnless(e[0] is Tensor.filter.E_shape)
+    def test_cast0(self):
+        t = Tensor('float32', [0])
+        t.data = numpy.random.rand(4) > 0.5
+        self.failUnless(str(t.data.dtype) == t.dtype)
+class T_stdlib(unittest.TestCase):
+    def test0(self):
+        t = _tensor(1.0)
+        tt = t.clone(False)
+        self.failUnless(t.dtype == tt.dtype)
+        self.failUnless(t.broadcastable is tt.broadcastable)
+        self.failUnless(tt.data is None)
+        self.failUnless(t.data == 1.0)
+    def test0b(self):
+        t = _tensor(1.0)
+        tt = t.clone()
+        self.failUnless(t.dtype == tt.dtype)
+        self.failUnless(t.broadcastable is tt.broadcastable)
+        self.failUnless(tt.data is None)
+        self.failUnless(t.data == 1.0)
+    def test1(self):
+        t = _tensor(1.0)
+        tt = t.clone(True)
+        self.failUnless(t.dtype == tt.dtype)
+        self.failUnless(t.broadcastable is tt.broadcastable)
+        self.failUnless(tt.data == 1.0)
+        self.failUnless(t.data == 1.0)
+        self.failUnless(t.data is not tt.data)
+    def test1b(self):
+        t = _tensor(1.0)
+        tt = copy(t)
+        self.failUnless(t.dtype == tt.dtype)
+        self.failUnless(t.broadcastable is tt.broadcastable)
+        self.failUnless(tt.data == 1.0)
+        self.failUnless(t.data == 1.0)
+        self.failUnless(t.data is not tt.data)
 if __name__ == '__main__':
    unittest.main()
--- a/_test_opt.py
+++ b/_test_opt.py
@@ -2,7 +2,7 @@
 import unittest
 import gof
-from opt import *
+from tensor_opt import *
 import tensor
 from tensor import Tensor
 from gof import Env

--- a/base_tensor.py
+++ b/base_tensor.py
-"""
-A simple class to store L{numpy.ndarray} data
-"""
-from gof import Result, Op, utils, AbstractFunctionError
-import numpy
-from copy import copy
-###########################
-# BaseTensor Class
-###########################
-class BaseTensor(Result):
-    """
-    L{Result} to store L{numpy.ndarray} or equivalent via .data
-    This class does not implement python operators and has no dependencies
-    on the L{Op}s that use it.
-    @todo: At some point we should document a glossary, such as terms like
-    broadcasting and shape.
-    @type _dtype: numpy dtype string such as 'int64' or 'float64' (among others)
-    @type _broadcastable: tuple or list or array of boolean values, whose length
-      is the number of dimensions of the contained L{ndarray}.
-    @ivar _broadcastable: Each element of the broadcastable vector tells us
-      something about the corresponding dimension:
-        - False means the dimension can be anything.
-        - True means  the dimension must be 1. Also, this dimension will be considered
-          for L{broadcasting}, as described and implemented in Numpy.
-    """
-    def __init__(self, dtype, broadcastable, name=None):
-        """Initialize a L{BaseTensor}
-        @note: This does not actually allocate any data.
-        """
-        # data is not given here. This may seem a bit strange, but when data was
-        # an argument, it made sense to use *either* the given dtype,
-        # broadcastable, or override them from the fields of data. This makes
-        # the function ugly, especially because it isn't obvious how to set
-        # broadcastable from data.  
-        #
-        # The only clean option I could think of, when passing a data arg was to 
-        # require the broadcastable field to be given.  Since broadcastable is
-        # the argument that is awkward to construct, I decided to put all this
-        # into the tensor(data,...) function below, which is like a second
-        # constructor that works with an ndarray.
-        Result.__init__(self, role=None, name=name)
-        self._dtype = str(dtype)
-        self.dtype_specs() # this is just for error checking
-        self._broadcastable = tuple(broadcastable)
-    ######################
-    # Result interface
-    ######################
-    # 
-    # filter
-    #
-    def filter(self, arr):
-        """Cast to an L{numpy.ndarray} and ensure arr has correct rank and shape."""
-        if not (isinstance(arr, numpy.ndarray) \
-                and arr.dtype==self.dtype):
-            arr = numpy.asarray(arr, dtype = self.dtype)
-        if len(self.broadcastable) != len(arr.shape):
-            raise ValueError(BaseTensor.filter.E_rank,
-                    self.broadcastable,
-                    arr.shape,
-                    self.owner)
-        for b, s in zip(self.broadcastable, arr.shape):
-            if b and (s != 1):
-                raise ValueError(BaseTensor.filter.E_shape)
-        return arr
-    # these strings are here so that tests can use them
-    filter.E_rank = 'wrong rank'
-    filter.E_shape = 'non-unit size on broadcastable dimension'
-    #
-    # type information
-    #
-    def dtype_specs(self):
-        """Return python - C type correspondance tuple for self.data
-        Return a tuple (python type, c type, numpy typenum) that corresponds to
-        L{self.dtype}.  It is for use in C code generation.
-        """
-        #TODO: add more type correspondances for e.g. int32, int64, float32,
-        #complex64, etc.
-        try:
-            return {'float32': (float, 'npy_float32', 'NPY_FLOAT32'),
-                    'float64': (float, 'npy_float64', 'NPY_FLOAT64'),
-                    'int8': (int, 'npy_int8', 'NPY_INT8'),
-                    'int16': (int, 'npy_int16', 'NPY_INT16'),
-                    'int32': (int, 'npy_int32', 'NPY_INT32'),
-                    'int64': (int, 'npy_int64', 'NPY_INT64'),
-                    'complex128': (complex, 'theano_complex128', 'NPY_COMPLEX128'),
-                    'complex64': (complex, 'theano_complex64', 'NPY_COMPLEX64')}[self.dtype]
-        except KeyError:
-            raise TypeError("Unsupported dtype for %s: %s" % (self.__class__.__name__, self.dtype))
-    #
-    # Description for constant folding
-    #
-    def desc(self):
-        """
-        Returns a hashable description of this L{BaseTensor}.
-        """
-        if self.data is not None:
-            return (BaseTensor, self.dtype, self.broadcastable, self.data.data[:])
-        else:
-            return (BaseTensor, self.dtype, self.broadcastable, None)
-    #
-    # C codegen stubs
-    #
-    def c_declare(self, name, sub):
-        return """
-        PyArrayObject* %(name)s;
-        int type_num_%(name)s;
-        typedef %(dtype)s dtype_%(name)s;
-        """ % dict(sub, name = name, dtype = self.dtype_specs()[1])
-    def c_init(self, name, sub):
-        return """
-        %(name)s = NULL;
-        type_num_%(name)s = %(type_num)s;
-        """ % dict(sub, name = name, type_num = self.dtype_specs()[2])
-    def c_extract(self, name, sub):
-        return """
-        %(name)s = NULL;
-        type_num_%(name)s = %(type_num)s;
-        if (py_%(name)s == Py_None) {
-            // We can either fail here or set %(name)s to NULL and rely on Ops using
-            // tensors to handle the NULL case, but if they fail to do so they'll end up
-            // with nasty segfaults, so this is public service.
-            PyErr_SetString(PyExc_ValueError, "expected an ndarray, not None");
-            %(fail)s
-            //%(name)s = NULL;
-        }
-        else if (!PyArray_Check(py_%(name)s)) {
-            PyErr_SetString(PyExc_ValueError, "expected an ndarray");
-            %(fail)s
-        }
-        else if (((PyArrayObject*)py_%(name)s)->descr->type_num != %(type_num)s) {
-            PyErr_SetString(PyExc_ValueError, "expected %(type_num)s");
-            %(fail)s
-        }
-        else {
-            %(name)s = (PyArrayObject*)(py_%(name)s);
-            Py_XINCREF(%(name)s);
-        }
-        """ % dict(sub, name = name, type_num = self.dtype_specs()[2])
-    def c_cleanup(self, name, sub):
-        return """
-        if (%(name)s) {
-            Py_XDECREF(%(name)s);
-        }
-        """ % locals()
-    def c_sync(self, name, sub):
-        return """
-        if (!%(name)s) {
-            Py_XDECREF(py_%(name)s);
-            py_%(name)s = Py_None;
-        }
-        else if ((void*)py_%(name)s != (void*)%(name)s) {
-            Py_XDECREF(py_%(name)s);
-            py_%(name)s = (PyObject*)%(name)s;
-            Py_XINCREF(py_%(name)s);
-        }
-        """ % locals()
-    def c_headers(self):
-        return []
-    def c_libraries(self):
-        return []
-    def c_support_code(cls):
-        template = """
-        struct theano_complex%(nbits)s : public npy_complex%(nbits)s
-        {
-            typedef theano_complex%(nbits)s complex_type;
-            typedef npy_float%(half_nbits)s scalar_type;
-            complex_type operator +(complex_type y) {
-                complex_type ret;
-                ret.real = this->real + y.real;
-                ret.imag = this->imag + y.imag;
-                return ret;
-            }
-            complex_type operator -(complex_type y) {
-                complex_type ret;
-                ret.real = this->real - y.real;
-                ret.imag = this->imag - y.imag;
-                return ret;
-            }
-            complex_type operator *(complex_type y) {
-                complex_type ret;
-                ret.real = this->real * y.real - this->imag * y.imag;
-                ret.imag = this->real * y.imag + this->imag * y.real;
-                return ret;
-            }
-            complex_type operator /(complex_type y) {
-                complex_type ret;
-                scalar_type y_norm_square = y.real * y.real + y.imag * y.imag;
-                ret.real = (this->real * y.real + this->imag * y.imag) / y_norm_square;
-                ret.imag = (this->imag * y.real - this->real * y.imag) / y_norm_square;
-                return ret;
-            }
-        };
-        """
-        return template % dict(nbits = 64, half_nbits = 32) + template % dict(nbits = 128, half_nbits = 64)
-        # todo: use C templating
-    ############################
-    # Tensor specific attributes
-    ############################
-    dtype = property(lambda self: self._dtype, doc = "read-only access to _dtype, which should not be changed")
-    broadcastable = property(lambda self: self._broadcastable, doc = "read-only access to _broadcastable, which should not be changed")
-    ############################
-    # Cloning facilities
-    ############################
-    def __copy__(self):
-        return self.clone(True)
-    def clone(self, transfer_data = False):
-        """Return a copy of this instance (with its own attributes)
-        If transfer_data is True, a copy of self.data is assigned to the copy's
-        data property, otherwise the copy's data is left as None.
-        """
-        cpy = self.__class__(self.dtype, self.broadcastable, self.name)
-        if transfer_data:
-            cpy.data = copy(self.data)
-        return cpy
-class BaseTensorOp(Op):
-    """
-    A basic L{Op} subclass that can be used to make L{Op}s that operate on L{Tensor}s.
-    It is not mandatory to inherit from this class, but it is practical.
-    @ivar nin: number of inputs
-    @ivar nout: number of outputs
-    @ivar out_tensor_class: L{BaseTensor} subclass used to instantiate the outputs
-     - input_wrapper: returns a L{Tensor} from its argument
-     - propagate_dtype: returns a list of dtypes corresponding to the
-     output dtypes from a list of input dtypes (if an input is not a
-     L{Tensor}, the passed value will be None)
-     - propagate_broadcastable: returns a list of tuples corresponding
-     to the output broadcastable flags from the input broadcastable flags
-     (if an input is not a L{Tensor}, the passed value will be None).
-    """
-    nin = -1 # nin == -1 means: arbitrary number of inputs
-    nout = 1
-    out_tensor_class = BaseTensor
-    @classmethod
-    def input_wrapper(cls, obj):
-        """
-        Returns a L{Result} from an arbitrary-typed input, if possible.
-        """
-        if isinstance(obj, BaseResult):
-            return obj
-        else:
-            raise TypeError("Expected a Result instance.")
-    def __init__(self, *inputs):
-        inputs = map(self.input_wrapper, inputs)
-        if self.nin >= 0:
-            if len(inputs) != self.nin:
-                raise TypeError("Wrong number of inputs for %s (got %i, expected %i)") \
-                    % (self, len(inputs), self.nin)
-        i_broadcastables = [getattr(input, 'broadcastable', None) for input in inputs]
-        i_dtypes = [getattr(input, 'dtype', None) for input in inputs]
-        o_broadcastables = utils.from_return_values(self.propagate_broadcastable(*i_broadcastables))
-        o_dtypes = utils.from_return_values(self.propagate_dtype(*i_dtypes))
-        self.inputs = inputs
-        self.outputs = [self.out_tensor_class(dtype, broadcastable) for broadcastable, dtype in zip(o_broadcastables, o_dtypes)]
-    def propagate_broadcastable(self, *inputs):
-        raise AbstractFunctionError()
-    def propagate_dtype(self, *i_dtypes):
-        rval = set([dtype for dtype in i_dtypes if dtype is not None])
-        if len(rval) == 0:
-            raise ValueError("Cannot infer the dtypes of the outputs with no Tensor inputs.")
-        elif len(rval) > 1:
-            raise ValueError("The dtypes of all inputs should be identical.")
-        return [rval.pop()] * self.nout
--- a/cutils.py
+++ b/cutils.py
-try:
-    from cutils_ext import *
-except ImportError:
-    from scipy import weave
-    single_runner = """
-        if (!PyCObject_Check(py_cthunk)) {
-            PyErr_SetString(PyExc_ValueError,
-                            "Argument to run_cthunk must be a PyCObject returned by the c_thunk method of an omega_op.");
-            return NULL;
-        }
-        void * ptr_addr = PyCObject_AsVoidPtr(py_cthunk);
-        int (*fn)(void*) = reinterpret_cast<int (*)(void*)>(ptr_addr);
-        //int (*fn)(void*) = static_cast<int (*)(void*)>(PyCObject_AsVoidPtr(py_cthunk));
-        //int (*fn)(void*) = NULL;
-        //fn += PyCObject_AsVoidPtr(py_cthunk);
-        //int (*fn)(void*) = 
-        void* it = PyCObject_GetDesc(py_cthunk);
-        int failure = fn(it);
-        if (failure) {
-            return NULL;
-        }
-        """
-    cthunk = object()
-    mod = weave.ext_tools.ext_module('cutils_ext')
-    fun =weave.ext_tools.ext_function('run_cthunk', single_runner, ['cthunk'])
-    fun.customize.add_extra_compile_arg('--permissive')
-    mod.add_function(fun)
-    mod.compile()
-    from cutils_ext import *
--- a/elemwise.py
+++ b/elemwise.py
@@ -3,7 +3,6 @@ import elemwise_cgen as cgen
 import numpy
 from gof import Op, Viewer, Destroyer
-#from base_tensor import BaseTensor as Tensor
 import scalar
 from scalar import upcast, Scalar
 import gof

--- a/gof/op.py
+++ b/gof/op.py
@@ -17,7 +17,9 @@ __all__ = ['Op',
 def constructor(op_cls, name = None):
-    """Make an Op look like a L{Result}-valued function."""
+    """
+    Make an L{Op} look like a L{Result}-valued function.
+    """
    def f(*args, **kwargs):
        op = op_cls(*args, **kwargs)
        if len(op.outputs) > 1:

--- a/gradient.py
+++ b/gradient.py
@@ -27,27 +27,14 @@ def grad_sources_inputs(sources, graph_inputs):
    calling L{Op.grad}(...) when it is provided by an L{Op}, and at least one of the
    outputs of the L{Op} has an associated gradient.
-    The L{Op.grad}(...) functions may be called in several ways (for the
+    The L{Op.grad}(...) functions are called as such:
-    convenience of the L{Op} implementer) depending on the number of inputs and
-    outputs.  
-    If there is one input and one output::
        op.grad( op.inputs[0], grad(op.outputs[0]))
-    If there are several inputs and one output::
-        op.grad( op.inputs, grad(op.outputs[0]))
-    If there is one input and several outputs::
-        op.grad( op.inputs[0], [grad(o) for o in op.outputs[0]])
-    If there are multiple inputs and outputs::
-        op.grad( op.inputs, [grad(o) for o in op.outputs[0]])
    This function expects the L{Op.grad}(...) function to return the gradient
-    expression [results] associated with the inputs of the L{Op}.  If the L{Op} has a
+    expression [results] associated with the inputs of the L{Op}. The L{Op} should
-    single input, it should return a single result; if the L{Op} has multiple
+    return a list of results corresponding to the gradients in the same order
-    inputs, it should return a list of results corresponding to the gradients in
+    as the inputs. If it has a single output it should return a list or tuple
-    the same order as the inputs.
+    of length 1.
    For each input wrt to which an L{Op} is not differentiable, it should return
    None instead of a result instance.
@@ -79,9 +66,6 @@ def grad_sources_inputs(sources, graph_inputs):
        #if all output gradients are None, continue
        if all(map(lambda x:x is None, g_outputs)): continue
-#         output_arg = _unpack_result(g_outputs)
-#         input_arg = _unpack_result(op.inputs)
        output_arg = g_outputs
        input_arg = op.inputs
@@ -90,8 +74,6 @@ def grad_sources_inputs(sources, graph_inputs):
        except AttributeError:
            dinputs = []
-#        input_arg = [input in dinputs and input.copy() or input for input in input_arg]
        new_input_arg = []
        for input in input_arg:
            if input in dinputs:

--- a/sparse.py
+++ b/sparse.py
@@ -11,7 +11,7 @@ import numpy
 from scipy import sparse
 import gof.op, gof.result
-import tensor, base_tensor
+import tensor
@@ -20,19 +20,19 @@ import tensor, base_tensor
 def _is_sparse_result(x):
    """
    @rtype: boolean
-    @return: True iff x is a L{SparseResult} (and not a L{base_tensor.BaseTensor})
+    @return: True iff x is a L{SparseResult} (and not a L{tensor.Tensor})
    """
-    if not isinstance(x, SparseResult) and not isinstance(x, base_tensor.BaseTensor):
+    if not isinstance(x, SparseResult) and not isinstance(x, tensor.Tensor):
-        raise NotImplementedError("_is_sparse should only be called on sparse.SparseResult or base_tensor.BaseTensor, not,", x)
+        raise NotImplementedError("_is_sparse should only be called on sparse.SparseResult or tensor.Tensor, not,", x)
    return isinstance(x, SparseResult)
 def _is_dense_result(x):
    """
    @rtype: boolean
-    @return: True unless x is a L{SparseResult} (and not a L{base_tensor.BaseTensor})
+    @return: True unless x is a L{SparseResult} (and not a L{tensor.Tensor})
    """
-    if not isinstance(x, SparseResult) and not isinstance(x, base_tensor.BaseTensor):
+    if not isinstance(x, SparseResult) and not isinstance(x, tensor.Tensor):
-        raise NotImplementedError("_is_sparse should only be called on sparse.SparseResult or base_tensor.BaseTensor, not,", x)
+        raise NotImplementedError("_is_sparse should only be called on sparse.SparseResult or tensor.Tensor, not,", x)
-    return isinstance(x, base_tensor.BaseTensor)
+    return isinstance(x, tensor.Tensor)
 def _is_sparse(x):
    """

--- a/tensor.py
+++ b/tensor.py
@@ -4,11 +4,12 @@ import inspect
 import numpy
+from copy import copy
 from gof import Result, Op, utils, Destroyer, Viewer, AbstractFunctionError
 import gof.result
 import gof.op
-from base_tensor import BaseTensor, BaseTensorOp
 import blas # for gemm, dot
 import elemwise as s2t
@@ -17,15 +18,239 @@ import scalar as scal
 from functools import partial
-class Tensor(BaseTensor):
+class Tensor(Result):
+    """
+    L{Result} to store L{numpy.ndarray} or equivalent via .data
+    This class does not implement python operators and has no dependencies
+    on the L{Op}s that use it.
+    @todo: At some point we should document a glossary, such as terms like
+    broadcasting and shape.
+    @type _dtype: numpy dtype string such as 'int64' or 'float64' (among others)
+    @type _broadcastable: tuple or list or array of boolean values, whose length
+      is the number of dimensions of the contained L{ndarray}.
+    @ivar _broadcastable: Each element of the broadcastable vector tells us
+      something about the corresponding dimension:
+        - False means the dimension can be anything.
+        - True means  the dimension must be 1. Also, this dimension will be considered
+          for L{broadcasting}, as described and implemented in Numpy.
    """
-    This subclass of L{BaseTensor} provides operator overloading using
-    implementations of L{Tensor} operations contained in this file.
-    Operators:
+    def __init__(self, dtype, broadcastable, name=None):
-     - most numeric operators are overloaded (to return L{Op}s that
+        """Initialize a L{Tensor}
-     perform the corresponding calculation)
+        @note: This does not actually allocate any data.
+        """
+        # data is not given here. This may seem a bit strange, but when data was
+        # an argument, it made sense to use *either* the given dtype,
+        # broadcastable, or override them from the fields of data. This makes
+        # the function ugly, especially because it isn't obvious how to set
+        # broadcastable from data.  
+        #
+        # The only clean option I could think of, when passing a data arg was to 
+        # require the broadcastable field to be given.  Since broadcastable is
+        # the argument that is awkward to construct, I decided to put all this
+        # into the tensor(data,...) function below, which is like a second
+        # constructor that works with an ndarray.
+        Result.__init__(self, role=None, name=name)
+        self._dtype = str(dtype)
+        self.dtype_specs() # this is just for error checking
+        self._broadcastable = tuple(broadcastable)
+    ######################
+    # Result interface
+    ######################
+    # 
+    # filter
+    #
+    def filter(self, arr):
+        """Cast to an L{numpy.ndarray} and ensure arr has correct rank and shape."""
+        if not (isinstance(arr, numpy.ndarray) \
+                and arr.dtype==self.dtype):
+            arr = numpy.asarray(arr, dtype = self.dtype)
+        if len(self.broadcastable) != len(arr.shape):
+            raise ValueError(Tensor.filter.E_rank,
+                    self.broadcastable,
+                    arr.shape,
+                    self.owner)
+        for b, s in zip(self.broadcastable, arr.shape):
+            if b and (s != 1):
+                raise ValueError(Tensor.filter.E_shape)
+        return arr
+    # these strings are here so that tests can use them
+    filter.E_rank = 'wrong rank'
+    filter.E_shape = 'non-unit size on broadcastable dimension'
+    #
+    # type information
+    #
+    def dtype_specs(self):
+        """Return python - C type correspondance tuple for self.data
+        Return a tuple (python type, c type, numpy typenum) that corresponds to
+        L{self.dtype}.  It is for use in C code generation.
+        """
+        #TODO: add more type correspondances for e.g. int32, int64, float32,
+        #complex64, etc.
+        try:
+            return {'float32': (float, 'npy_float32', 'NPY_FLOAT32'),
+                    'float64': (float, 'npy_float64', 'NPY_FLOAT64'),
+                    'int8': (int, 'npy_int8', 'NPY_INT8'),
+                    'int16': (int, 'npy_int16', 'NPY_INT16'),
+                    'int32': (int, 'npy_int32', 'NPY_INT32'),
+                    'int64': (int, 'npy_int64', 'NPY_INT64'),
+                    'complex128': (complex, 'theano_complex128', 'NPY_COMPLEX128'),
+                    'complex64': (complex, 'theano_complex64', 'NPY_COMPLEX64')}[self.dtype]
+        except KeyError:
+            raise TypeError("Unsupported dtype for %s: %s" % (self.__class__.__name__, self.dtype))
+    #
+    # Description for constant folding
+    #
+    def desc(self):
+        """
+        Returns a hashable description of this L{Tensor}.
        """
+        if self.data is not None:
+            return (Tensor, self.dtype, self.broadcastable, self.data.data[:])
+        else:
+            return (Tensor, self.dtype, self.broadcastable, None)
+    #
+    # C codegen stubs
+    #
+    def c_declare(self, name, sub):
+        return """
+        PyArrayObject* %(name)s;
+        int type_num_%(name)s;
+        typedef %(dtype)s dtype_%(name)s;
+        """ % dict(sub, name = name, dtype = self.dtype_specs()[1])
+    def c_init(self, name, sub):
+        return """
+        %(name)s = NULL;
+        type_num_%(name)s = %(type_num)s;
+        """ % dict(sub, name = name, type_num = self.dtype_specs()[2])
+    def c_extract(self, name, sub):
+        return """
+        %(name)s = NULL;
+        type_num_%(name)s = %(type_num)s;
+        if (py_%(name)s == Py_None) {
+            // We can either fail here or set %(name)s to NULL and rely on Ops using
+            // tensors to handle the NULL case, but if they fail to do so they'll end up
+            // with nasty segfaults, so this is public service.
+            PyErr_SetString(PyExc_ValueError, "expected an ndarray, not None");
+            %(fail)s
+            //%(name)s = NULL;
+        }
+        else if (!PyArray_Check(py_%(name)s)) {
+            PyErr_SetString(PyExc_ValueError, "expected an ndarray");
+            %(fail)s
+        }
+        else if (((PyArrayObject*)py_%(name)s)->descr->type_num != %(type_num)s) {
+            PyErr_SetString(PyExc_ValueError, "expected %(type_num)s");
+            %(fail)s
+        }
+        else {
+            %(name)s = (PyArrayObject*)(py_%(name)s);
+            Py_XINCREF(%(name)s);
+        }
+        """ % dict(sub, name = name, type_num = self.dtype_specs()[2])
+    def c_cleanup(self, name, sub):
+        return """
+        if (%(name)s) {
+            Py_XDECREF(%(name)s);
+        }
+        """ % locals()
+    def c_sync(self, name, sub):
+        return """
+        if (!%(name)s) {
+            Py_XDECREF(py_%(name)s);
+            py_%(name)s = Py_None;
+        }
+        else if ((void*)py_%(name)s != (void*)%(name)s) {
+            Py_XDECREF(py_%(name)s);
+            py_%(name)s = (PyObject*)%(name)s;
+            Py_XINCREF(py_%(name)s);
+        }
+        """ % locals()
+    def c_headers(self):
+        return []
+    def c_libraries(self):
+        return []
+    def c_support_code(cls):
+        template = """
+        struct theano_complex%(nbits)s : public npy_complex%(nbits)s
+        {
+            typedef theano_complex%(nbits)s complex_type;
+            typedef npy_float%(half_nbits)s scalar_type;
+            complex_type operator +(complex_type y) {
+                complex_type ret;
+                ret.real = this->real + y.real;
+                ret.imag = this->imag + y.imag;
+                return ret;
+            }
+            complex_type operator -(complex_type y) {
+                complex_type ret;
+                ret.real = this->real - y.real;
+                ret.imag = this->imag - y.imag;
+                return ret;
+            }
+            complex_type operator *(complex_type y) {
+                complex_type ret;
+                ret.real = this->real * y.real - this->imag * y.imag;
+                ret.imag = this->real * y.imag + this->imag * y.real;
+                return ret;
+            }
+            complex_type operator /(complex_type y) {
+                complex_type ret;
+                scalar_type y_norm_square = y.real * y.real + y.imag * y.imag;
+                ret.real = (this->real * y.real + this->imag * y.imag) / y_norm_square;
+                ret.imag = (this->imag * y.real - this->real * y.imag) / y_norm_square;
+                return ret;
+            }
+        };
+        """
+        return template % dict(nbits = 64, half_nbits = 32) + template % dict(nbits = 128, half_nbits = 64)
+        # todo: use C templating
+    ############################
+    # Tensor specific attributes
+    ############################
+    dtype = property(lambda self: self._dtype, doc = "read-only access to _dtype, which should not be changed")
+    broadcastable = property(lambda self: self._broadcastable, doc = "read-only access to _broadcastable, which should not be changed")
+    ndim = property(lambda self: len(self.broadcastable), doc = "read-only access to the number of dimensions")
+    ############################
+    # Cloning facilities
+    ############################
+    def __copy__(self):
+        return self.clone(True)
+    def clone(self, transfer_data = False):
+        """Return a copy of this instance (with its own attributes)
+        If transfer_data is True, a copy of self.data is assigned to the copy's
+        data property, otherwise the copy's data is left as None.
+        """
+        cpy = self.__class__(self.dtype, self.broadcastable, self.name)
+        if transfer_data:
+            cpy.data = copy(self.data)
+        return cpy
    #UNARY
    def __abs__(self): return Abs(self).out
@@ -79,7 +304,7 @@ s2t.Tensor = Tensor
 # alternate Tensor constructor
 def astensor(data, broadcastable=None, name=None):
    """Return a L{Tensor} containing given data"""
-    if isinstance(data, BaseTensor):
+    if isinstance(data, Tensor):
        if broadcastable is not None and list(data.broadcastable) != list(broadcastable):
            raise TypeError("The data to wrap as a Tensor has the wrong broadcastable pattern. Expected %s, got %s." % (broadcastable, data.broadcastable))
        if name is not None and name != data.name:
@@ -153,36 +378,57 @@ cols, icols, fcols = _multi(col, icol, fcol)
 # to upcast their arguments... this internal-use function is a good place to put debugging stuff, better than the global astensor.
 _as_tensor = astensor
-class _Op(BaseTensorOp):
-    """A convenient base for the ops in this file"""
-    out_tensor_class = Tensor
-    @classmethod
+class _Op(Op):
-    def input_wrapper(cls, obj):
+    """
-        return _as_tensor(obj)
+    A basic L{Op} subclass that can be used to make L{Op}s that operate on L{Tensor}s.
+    It is not mandatory to inherit from this class, but it is practical.
+    @ivar nin: number of inputs
+    @ivar nout: number of outputs
+    @ivar out_tensor_class: L{Tensor} subclass used to instantiate the outputs
+     - input_wrapper: returns a L{Tensor} from its argument
+     - propagate_dtype: returns a list of dtypes corresponding to the
+     output dtypes from a list of input dtypes (if an input is not a
+     L{Tensor}, the passed value will be None)
+     - propagate_broadcastable: returns a list of tuples corresponding
+     to the output broadcastable flags from the input broadcastable flags
+     (if an input is not a L{Tensor}, the passed value will be None).
+    """
-    def c_var_names(self):
+    nin = -1 # nin == -1 means: arbitrary number of inputs
-        (self, inames, onames), _1, _2, _3 = inspect.getargspec(self.c_impl)
+    nout = 1
-        inames = utils.from_return_values(inames)
-        onames = utils.from_return_values(onames)
-        return [inames, onames]
-    def c_code(self, input_names, output_names, sub):
+    def __init__(self, *inputs):
-        sub = dict(sub)
+        inputs = map(_as_tensor, inputs)
-        icvn, ocvn = self.c_var_names()
-        for real, tosub in zip(input_names + output_names, icvn + ocvn):
-            sub[tosub] = real
-        return self.c_impl(self.inputs, self.outputs) % sub
-    def c_impl(self, inputs, outputs):
+        if self.nin >= 0:
-        raise AbstractFunctionError("No c_impl for %s" % self.__class__.__name__)
+            if len(inputs) != self.nin:
+                raise TypeError("Wrong number of inputs for %s (got %i, expected %i)") \
+                    % (self, len(inputs), self.nin)
-class _Unary:
+        i_broadcastables = [getattr(input, 'broadcastable', None) for input in inputs]
-    nin = 1
+        i_dtypes = [getattr(input, 'dtype', None) for input in inputs]
+        o_broadcastables = utils.from_return_values(self.propagate_broadcastable(*i_broadcastables))
+        o_dtypes = utils.from_return_values(self.propagate_dtype(*i_dtypes))
+        self.inputs = inputs
+        self.outputs = [Tensor(dtype, broadcastable) for broadcastable, dtype in zip(o_broadcastables, o_dtypes)]
+    def propagate_broadcastable(self, *inputs):
+        raise AbstractFunctionError()
+    def propagate_dtype(self, *i_dtypes):
+        rval = set([dtype for dtype in i_dtypes if dtype is not None])
+        if len(rval) == 0:
+            raise ValueError("Cannot infer the dtypes of the outputs with no Tensor inputs.")
+        elif len(rval) > 1:
+            raise ValueError("The dtypes of all inputs should be identical.")
+        return [rval.pop()] * self.nout
-class _Binary:
-    nin = 2
 ##########################

--- a/opt.py
+++ b/opt.py