Merge

ce7533df · James Bergstra · 567acc71 · f7bf373e · ce7533df · ce7533df
--- a/theano/compile/builders.py
+++ b/theano/compile/builders.py
@@ -35,11 +35,21 @@ class OpFromGraph(gof.Op):
    """
    
    def __init__(self, inputs, outputs, grad_depth = 1, **kwargs):
+        if not isinstance(outputs, list):
+            raise TypeError('outputs must be list', outputs)
+        for i in inputs + outputs:
+            if not isinstance(i, gof.Variable):
+                raise TypeError('inputs and outputs must be Variable instances', i)
+        if 'updates' in kwargs:
+            raise TypeError('updates are not allowed in kwargs')
+        # TODO: the graph may have implicit inputs like Value and SharedVariable instances.
+        #       what impact to they have on the validity of this Op?
        self.fn = function(inputs, outputs, **kwargs)
        self.inputs = inputs
        self.outputs = outputs
        self.input_types = [input.type for input in inputs]
        self.output_types = [output.type for output in outputs]
+
        if grad_depth > 0:
            output_grads = [t() for t in self.output_types]
            gd = G.grad_sources_inputs(zip(self.outputs, output_grads), self.inputs)
@@ -52,6 +62,13 @@ class OpFromGraph(gof.Op):
                    self.grad_ops.append(OpFromGraph(inputs + output_grads,
                                                     [g],
                                                     grad_depth = grad_depth - 1))
+    def __eq__(self, other):
+        #TODO: recognize a copy
+        return self is other
+
+    def __hash__(self):
+        #TODO: use internal variables in hash
+        return hash(type(self))

    def make_node(self, *inputs):
        for input, type in zip(inputs, self.input_types):
@@ -63,8 +80,11 @@ class OpFromGraph(gof.Op):

    def perform(self, node, inputs, outputs):
        variables = self.fn(*inputs)
+        assert len(variables) == len(outputs)
        for output, variable in zip(outputs, variables):
-            output[0] = variable
+            ##TODO: when function's output-borrowing semantics are correct, we wont need this
+            # copy anymore
+            output[0] = variable.copy()

    def grad(self, inputs, output_grads):
        if hasattr(self, 'grad_ops'):

--- a/theano/compile/sandbox/__init__.py
+++ b/theano/compile/sandbox/__init__.py
+from .sharedvalue import shared
+from .pfunc import pfunc
--- a/theano/compile/sandbox/sharedvalue.py
+++ b/theano/compile/sandbox/sharedvalue.py
 """Provide a simple user friendly API """
 __docformat__ = 'restructuredtext en'

+import traceback
 import copy
 import numpy

@@ -10,6 +11,14 @@ from theano.tensor import TensorType
 from theano.scalar import Scalar
 from theano.compile import function

+import logging
+_logger = logging.getLogger('theano.compile.sandbox.sharedvalue')
+_logger.setLevel(logging.DEBUG)
+def debug(*msg): _logger.debug(' '.join(str(m) for m in msg))
+def info(*msg): _logger.info(' '.join(str(m) for m in msg))
+def warn(*msg): _logger.warn(' '.join(str(m) for m in msg))
+def warning(*msg): _logger.warning(' '.join(str(m) for m in msg))
+def error(*msg): _logger.error(' '.join(str(m) for m in msg))

 class SharedVariable(Variable):
    """
@@ -92,6 +101,9 @@ class SharedVariable(Variable):
        :param update: the new value for this shared variable when updated by a pfunc.

        :returns: a Variable whose value will be assigned to this SharedVariable by a pfunc.
+
+        :note: The return value of this function must match the self.type, or else pfunc()
+        will raise a TypeError.
        """
        if not isinstance(update, Variable):
            # The value for the update is not a Variable: we cast it into
@@ -148,14 +160,35 @@ def tensor_constructor(value, name=None, strict=False, broadcastable=None):
    type = TensorType(value.dtype, broadcastable=broadcastable)
    return TensorSharedVariable(type=type, value=value, name=name, strict=strict)

+# TensorSharedVariable brings in the tensor operators, is not ideal, but works as long as we
+# dont do purely scalar-scalar operations 
+class ScalarSharedVariable(SharedVariable, theano.tensor.basic._tensor_py_operators):
+    pass
 @shared_constructor
 def scalar_constructor(value, name=None, strict=False, dtype=None):
-    """SharedVariable constructor for scalar values. Defaults to int64 or float64"""  
-    if not isinstance(value, (float,int)):
+    """SharedVariable constructor for scalar values. Defaults to int64 or float64. 
+
+    :note: We implement this using 0-d tensors for now.
+    
+    """  
+    if not isinstance (value, (numpy.number, float, int)):
        raise TypeError()
-    # use float64 and int64 by default, user can override
-    if not dtype:
-        dtype = 'int64' if isinstance(value,int) else 'float64'
-    type = Scalar(dtype)
-    return TensorSharedVariable(type=type, value=numpy.asarray(value), name=name, strict=strict)
+    if dtype is None:
+        if isinstance(value, float):
+            dtype = 'float64'
+        elif isinstance(value, int):
+            dtype = 'int64'
+        else:
+            dtype = type(value).__name__
+
+    type = TensorType(dtype=dtype, broadcastable=[])
+
+    try:
+        # don't pass the dtype to asarray because we want this to fail if strict is True and the
+        # types do not match
+        rval = ScalarSharedVariable(type=type, value=numpy.asarray(value), name=name, strict=strict)
+        return rval
+    except:
+        traceback.print_exc()
+        raise

--- a/theano/compile/sandbox/tests/test_nnet.py
+++ b/theano/compile/sandbox/tests/test_nnet.py
@@ -18,6 +18,7 @@ class NNet(object):
       self.lr = shared(lr, 'learning_rate')
       self.w1 = shared(numpy.zeros((n_hidden, n_input)), 'w1')
       self.w2 = shared(numpy.zeros((n_output, n_hidden)), 'w2')
+       print self.lr.type

       self.hidden = sigmoid(tensor.dot(self.w1, self.input))
       self.output = tensor.dot(self.w2, self.hidden)

--- a/theano/compile/sandbox/tests/test_pfunc.py
+++ b/theano/compile/sandbox/tests/test_pfunc.py
@@ -172,7 +172,7 @@ class Test_pfunc(unittest.TestCase):
        # Same but using a mutable constant to show how it can be used to
        # modify the update value after the function is created.
        x.value = 0
-        y = numpy.ones(())
+        y = numpy.ones((), dtype='int64')
        assign_mutable = pfunc([], [], updates = {x: y})
        assign_mutable()
        self.failUnless(x.value == 1)

--- a/theano/compile/sandbox/tests/test_shared.py
+++ b/theano/compile/sandbox/tests/test_shared.py
@@ -10,9 +10,15 @@ class Test_SharedVariable(unittest.TestCase):

    def test_ctors(self):

-        assert shared(7).type == Scalar('int64')
-        assert shared(7.0).type == Scalar('float64')
-        assert shared(7, dtype='float64').type == Scalar('float64')
+        if 0: #when using an implementation that handles scalars with Scalar type
+            assert shared(7).type == Scalar('int64')
+            assert shared(7.0).type == Scalar('float64')
+            assert shared(7, dtype='float64').type == Scalar('float64')
+
+        else:
+            assert shared(7).type == theano.tensor.lscalar
+            assert shared(7.0).type == theano.tensor.dscalar
+            assert shared(7, dtype='float64').type == theano.tensor.dscalar

        # test tensor constructor
        b = shared(numpy.zeros((5,5), dtype='int32'))
@@ -107,13 +113,17 @@ class Test_SharedVariable(unittest.TestCase):
    def test_strict(self):
        def f(var, val): var.value = val

-        b = shared(7, strict=True)
-        self.failUnlessRaises(TypeError, f(b,8.23))
-        b = shared(7.234, strict=True)
-        self.failUnlessRaises(TypeError, f(b,8))
+        b = shared(numpy.int64(7), strict=True)
+        #assert b.type == Scalar('int64')
+        assert b.type == theano.tensor.lscalar
+        self.failUnlessRaises(TypeError, f, b, 8.23)
+        b = shared(numpy.float64(7.234), strict=True)
+        #assert b.type == Scalar('float64')
+        assert b.type == theano.tensor.dscalar
+        self.failUnlessRaises(TypeError, f, b, 8)

        c = shared(numpy.zeros((5,5), dtype='float32'))
-        self.failUnlessRaises(TypeError, f(b, numpy.random.rand(5,5)))
+        self.failUnlessRaises(TypeError, f, b, numpy.random.rand(5,5))



--- a/theano/compile/tests/test_builders.py
+++ b/theano/compile/tests/test_builders.py
@@ -20,9 +20,12 @@ class T_OpFromGraph(unittest.TestCase):
        x, y, z = T.matrices('xyz')
        e = x + y * z
        op = OpFromGraph([x, y, z], [e], mode='FAST_RUN')
-        f = op(x, y, z) - op(y, z, x)
+        f = op(x, y, z) - op(y, z, x) #(1+3*5=array of 16) - (3+1*5=array of 8)
        fn = function([x, y, z], f)
        xv, yv, zv = N.ones((2, 2)), N.ones((2, 2))*3, N.ones((2, 2))*5
+        print function, function.__module__
+        print fn.maker.env.toposort()
+        print fn(xv, yv, zv)
        assert numpy.all(8.0 == fn(xv, yv, zv))
        assert numpy.all(8.0 == fn(xv, yv, zv))
    

--- a/theano/gof/cc.py
+++ b/theano/gof/cc.py
@@ -6,9 +6,15 @@ Defines Linkers that deal with C implementations.
 from copy import copy
 import re #for set_compiledir
 import os, sys, platform, StringIO, time
-import md5
+
 if sys.version_info[:2] >= (2,5):
-  import hashlib
+    import hashlib
+    def hash_from_code(msg):
+        return hashlib.md5(msg).hexdigest()
+else:
+    import md5
+    def hash_from_code(msg):
+        return md5.new(struct_code).hexdigest()

 from theano.gof.python25 import any, all

@@ -512,6 +518,8 @@ class CLinker(link.Linker):
            except utils.MethodNotDefined:
                cleanup = ""

+            info('compiling un-versioned Apply', node)
+
            blocks.append(CodeBlock("", behavior, cleanup, sub))
            tasks.append((node, 'code', id))
            id += 1
@@ -525,11 +533,7 @@ class CLinker(link.Linker):

        # The hash calculated on the code identifies it so weave can cache properly.
        # (the hash has to be used outside of the support code because weave does not consider changes in the support code)
-        # hashlib is new to 2.5
-        if sys.version_info[:2] < (2,5):
-          hash = md5.new(struct_code).hexdigest()
-        else:
-          hash = hashlib.md5(struct_code).hexdigest()
+        hash = hash_from_code(struct_code)

        struct_name = '__struct_compiled_op_%s' % hash
        #struct_code %= dict(name = struct_name)
@@ -811,7 +815,7 @@ class CLinker(link.Linker):
                return (op_pos[i.owner], i.owner.outputs.index(i))

        for opos, o in enumerate(order):
-            version.append(o.op.c_code_cache_version())
+            version.append(o.op.c_code_cache_version_apply(o))
            for i in o.inputs:
                version.append(i.type.c_code_cache_version())
            for i in o.outputs:

--- a/theano/gof/op.py
+++ b/theano/gof/op.py
@@ -106,8 +106,27 @@ class CLinkerObject(object):

        The cache mechanism may erase cached modules that have been superceded by newer
        versions.  See `ModuleCache` for details.
+
+        :note: See also `c_code_cache_version_apply()`
+        """
+        return ()
+
+    def c_code_cache_version_apply(self, node):
+        """Return a tuple of integers indicating the version of this Op.
+
+        An empty tuple indicates an 'unversioned' Op that will not be cached between processes.
+
+        The cache mechanism may erase cached modules that have been superceded by newer
+        versions.  See `ModuleCache` for details.
+
+        :note: See also `c_code_cache_version()`
+
+        :note: This function overrides `c_code_cache_version` unless it explicitly calls
+        `c_code_cache_version`.  The default implementation simply calls `c_code_cache_version`
+        and ignores the `node` argument.
        """
-        return (1,)
+        return self.c_code_cache_version()
+

    def c_compile_args(self):
        """Optional: Return a list of compile args recommended to compile the

--- a/theano/gof/type.py
+++ b/theano/gof/type.py
@@ -177,6 +177,16 @@ class CLinkerType(CLinkerObject):
        """
        raise MethodNotDefined("c_sync", type(self), self.__class__.__name__)

+    def c_code_cache_version(self):
+        """Return a tuple of integers indicating the version of this Type.
+
+        An empty tuple indicates an 'unversioned' Type that will not be cached between processes.
+
+        The cache mechanism may erase cached modules that have been superceded by newer
+        versions.  See `ModuleCache` for details.
+
+        """
+        return ()




--- a/theano/gradient.py
+++ b/theano/gradient.py
@@ -108,7 +108,8 @@ def grad_sources_inputs(sources, graph_inputs, warn_type=True):
                if g_r and (getattr(r,'type',0) != getattr(g_r,'type', 1)):
                    r_type = getattr(r,'type', None)
                    g_r_type = getattr(g_r,'type', None)
-                    warning('%s.grad returned a different type for input %i: %s vs.  %s'%(node.op, ii, r_type, g_r_type))
+                    warning('%s.grad returned a different type (%s) for input %i of type (%s)'%(
+                        node.op, g_r_type, ii, r_type))
            if g_r and len(sources) == 1 and sources[0][0].name and r.name:
                g_r.name = "(d%s/d%s)" % (sources[0][0].name, r.name)
            if g_r is not None: 

--- a/theano/scalar/basic.py
+++ b/theano/scalar/basic.py
@@ -61,7 +61,9 @@ class Scalar(Type):
    def filter(self, data, strict = False):
        py_type = self.dtype_specs()[0]
        if strict and not isinstance(data, py_type):
-            raise TypeError("%s expected a %s" % (self, self.dtype), data)
+            raise TypeError("%s expected a %s, got %s of type %s" % (self, py_type, data,
+                type(data)), 
+                    data)
        try:
            return py_type(data)
        except Exception, e:
@@ -180,23 +182,44 @@ class Scalar(Type):
                ret.imag = (this->imag * y.real - this->real * y.imag) / y_norm_square;
                return ret;
            }
-            complex_type& operator =(const scalar_type& y) {
-            this->real=y;
-            this->imag=0;
-            return *this;
-            }
-            %(upcast)s
+            template <typename T>
+            complex_type& operator =(const T& y);
         };
         """
+        operator_eq = """
+        template <> %(mytype)s & %(mytype)s::operator =(const npy_int8 & y)
+        { this->real=y; this->imag=0; return *this; }
+
+        template <> %(mytype)s & %(mytype)s::operator =(const npy_int16 & y)
+        { this->real=y; this->imag=0; return *this; }
+
+        template <> %(mytype)s & %(mytype)s::operator =(const npy_int32 & y)
+        { this->real=y; this->imag=0; return *this; }
+
+        template <> %(mytype)s & %(mytype)s::operator =(const npy_int64 & y)
+        { this->real=y; this->imag=0; return *this; }
+
+        template <> %(mytype)s & %(mytype)s::operator =(const npy_float32 & y)
+        { this->real=y; this->imag=0; return *this; }
+
+        template <> %(mytype)s & %(mytype)s::operator =(const npy_float64 & y)
+        { this->real=y; this->imag=0; return *this; }
+
+        template <> %(mytype)s & %(mytype)s::operator =(const theano_complex128 & y)
+        { this->real=y.real; this->imag=y.imag; return *this; }
+
+        template <> %(mytype)s & %(mytype)s::operator =(const theano_complex64 & y)
+        { this->real=y.real; this->imag=y.imag; return *this; }
+
+        """
        # todo: use C templating
-        return template % dict(nbits = 64, half_nbits = 32, upcast="") + template % dict(nbits = 128, half_nbits = 64, upcast="""
-        complex_type& operator =(theano_complex64 y) {
-            this->real=y.real;
-            this->imag=y.imag;
-            return *this;
-            }
-        """)
-    
+        return template % dict(nbits = 64, half_nbits = 32) \
+                + template % dict(nbits = 128, half_nbits = 64) \
+                + operator_eq % dict(mytype='theano_complex128') \
+                + operator_eq % dict(mytype='theano_complex64')
+
+    def c_code_cache_version(self):
+        return (2,)


 int8 = Scalar('int8')
@@ -293,6 +316,8 @@ class transfer_type(gof.utils.object2):
    def __init__(self, *transfer):
        assert all(type(x) == int for x in transfer)
        self.transfer = transfer
+    def __str__(self):
+        return 'transfer_type{%s}'%self.transfer
    def __call__(self, *types):
        upcast = upcast_out(*types)
        retval = []
@@ -395,6 +420,9 @@ class ScalarOp(Op):
        else:
            return "%s{%s}" % (self.__class__.__name__, ", ".join("%s=%s" % (k, v) for k, v in self.__dict__.items() if k != "name"))

+    def c_code_cache_version(self):
+        return (2,)
+

 class UnaryScalarOp(ScalarOp):
    nin = 1
@@ -617,12 +645,10 @@ class Add(ScalarOp):
      retval = []
      for i in inputs:
        if i.type in grad_types:
-          retval += [gz]
+          retval += [cast(gz, i.type.dtype)]
        else:
          retval += [None]
      return retval
-      #backport
-      #return [(gz if i.type in grad_types else None) for i in inputs]
 add = Add(upcast_out, name = 'add')

 class Mul(ScalarOp):
@@ -658,18 +684,15 @@ class Sub(BinaryScalarOp):
        return "%(z)s = %(x)s - %(y)s;" % locals()
    def grad(self, (x, y), (gz, )):
        if x.type in grad_types:
-          first_part = gz
+            first_part = cast(gz, x.type.dtype)
        else:
-          first_part = None
+            first_part = None

        if y.type in grad_types:
-          second_part = -gz
+            second_part = cast(-gz, y.type.dtype)
        else:
-          second_part = None
-        
+            second_part = None
        return first_part, second_part
-
-        #return gz if x.type in grad_types else None, -gz if y.type in grad_types else None
 sub = Sub(upcast_out, name = 'sub')

 def div_proxy(x, y):
@@ -699,19 +722,15 @@ class TrueDiv(BinaryScalarOp):
        return "%(z)s = %(x)s / %(y)s;" % locals()
    def grad(self, (x, y), (gz, )):
        if x.type in grad_types:
-          first_part = gz / y
+          first_part = cast(gz / y, x.type.dtype)
        else:
          first_part = None

        if y.type in grad_types:
-          second_part = -(gz * x) / (y * y)
+          second_part = cast(-(gz * x) / (y * y), y.type.dtype)
        else:
          second_part = None
-
-        return (first_part, second_part)
-
-        #return (gz / y if x.type in grad_types else None,
-        #        -(gz * x) / (y * y) if y.type in grad_types else None)
+        return first_part, second_part
 true_div = TrueDiv(upcast_out, name = 'true_div')

 class IntDiv(BinaryScalarOp):
@@ -811,19 +830,60 @@ second = Second(transfer_type(1), name = 'second')


 class Identity(UnaryScalarOp):
-    def impl(self, x):
-        return x
+    def impl(self, input):
+        return input
    def c_code(self, node, name, (x, ), (z, ), sub):
        return "%(z)s = %(x)s;" % locals()
    def grad(self, (x, ), (gz, )):
        if x.type in grad_types:
-          return gz,
+            return gz,
+        else:
+            return None,
+identity = Identity(same_out, name = 'identity')
+
+#### CASTING OPERATIONS
+class Cast(UnaryScalarOp):
+    def __init__(self, o_type, name=None):
+        if not isinstance(o_type, Scalar):
+            raise TypeError(o_type)
+        super(Cast, self).__init__(specific_out(o_type), name=name)
+        self.o_type = o_type
+        self.ctor = getattr(numpy, o_type.dtype)
+    def impl(self, input):
+        return self.ctor(input)
+    def c_code(self, node, name, (x, ), (z, ), sub):
+        return "%(z)s = %(x)s;" % locals()
+    def grad(self, (x, ), (gz, )):
+        if x.type in grad_types:
+          return [cast(gz, x.type.dtype)]
        else:
          return None,

-        #backport
-        #return gz if x.type in grad_types else None,
-identity = Identity(same_out, name = 'identity')
+convert_to_int8 = Cast(int8, name='convert_to_int8')
+convert_to_int16 = Cast(int16, name='convert_to_int16')
+convert_to_int32 = Cast(int32, name='convert_to_int32')
+convert_to_int64 = Cast(int64, name='convert_to_int64')
+convert_to_float32 = Cast(float32, name='convert_to_float32')
+convert_to_float64 = Cast(float64, name='convert_to_float64')
+convert_to_complex64 = Cast(complex64, name='convert_to_complex64')
+convert_to_complex128 = Cast(complex128, name='convert_to_complex128')
+
+_cast_mapping = {'int8': convert_to_int8,
+           'int16': convert_to_int16,
+           'int32': convert_to_int32,
+           'int64': convert_to_int64,
+           'float32': convert_to_float32,
+           'float64': convert_to_float64,
+           'complex64': convert_to_complex64,
+           'complex128': convert_to_complex128}
+def cast(x, dtype):
+    """Symbolically cast `x` to a Scalar of given `dtype`.""" 
+    _x = as_scalar(x)
+    if _x.type.dtype == dtype:
+        return _x
+    if _x.type.dtype.startswith('complex') and not dtype.startswith('complex'):
+        raise TypeError('Casting from complex to real is ambiguous: consider real(), imag(), angle() or abs()')
+    return _cast_mapping[dtype](_x)

 class Abs(UnaryScalarOp):
    def make_node(self, x):
@@ -883,8 +943,6 @@ class Neg(UnaryScalarOp):
          return -gz,
        else:
          return None,
-        #backport
-        #return -gz if x.type in grad_types else None,
    def c_code(self, node, name, (x, ), (z, ), sub):
        return "%(z)s = -%(x)s;" % locals()
 neg = Neg(same_out, name = 'neg')

--- a/theano/tensor/basic.py
+++ b/theano/tensor/basic.py
@@ -465,9 +465,16 @@ class TensorType(Type):
    def c_libraries(self):
        return []

-    def c_support_code(cls):
+    def c_support_code(self):
        """Override `CLinkerOp.c_support_code` """
-        return scal.Scalar("int8").c_support_code()
+        return scal.Scalar(self.dtype).c_support_code()
+
+    def c_code_cache_version(self):
+        scalar_version = scal.Scalar(self.dtype).c_code_cache_version()
+        if scalar_version:
+            return (1,) + scalar_version
+        else:
+            return ()

 # Easy constructors

@@ -887,18 +894,6 @@ class ScalarFromTensor(Op):
 scalar_from_tensor = ScalarFromTensor()


-@constructor
-def cast(t, dtype):
-    mapping = {'int8': convert_to_int8,
-               'int16': convert_to_int16,
-               'int32': convert_to_int32,
-               'int64': convert_to_int64,
-               'float32': convert_to_float32,
-               'float64': convert_to_float64,
-               'complex64': convert_to_complex64,
-               'complex128': convert_to_complex128}
-    return mapping[dtype](t)
-
 #to be removed as we get the epydoc routine-documenting thing going -JB 20080924
 def _conversion(real_value, name):
    __oplist_tag(real_value, 'casting')
@@ -906,30 +901,52 @@ def _conversion(real_value, name):
    pprint.assign(real_value, printing.FunctionPrinter(name))
    return real_value

-convert_to_int8  = _conversion(elemwise.Elemwise(scal.Identity(scal.specific_out(scal.int8))), 'int8')
+
+#
+#  These _conver_to_<type> functions have leading underscores to indicate that they should not
+#  be called directly.  They do not perform sanity checks about what types you are casting to
+#  what.  That logic is implemented by the `cast()` function below.
+#
+
+_convert_to_int8  = _conversion(elemwise.Elemwise(scal.convert_to_int8), 'int8')
 """Cast to 8-bit integer"""
    
-convert_to_int16 = _conversion(elemwise.Elemwise(scal.Identity(scal.specific_out(scal.int16))), 'int16')
+_convert_to_int16 = _conversion(elemwise.Elemwise(scal.convert_to_int16), 'int16')
 """Cast to 16-bit integer"""

-convert_to_int32 = _conversion(elemwise.Elemwise(scal.Identity(scal.specific_out(scal.int32))), 'int32')
+_convert_to_int32 = _conversion(elemwise.Elemwise(scal.convert_to_int32), 'int32')
 """Cast to 32-bit integer"""

-convert_to_int64 = _conversion(elemwise.Elemwise(scal.Identity(scal.specific_out(scal.int64))), 'int64')
+_convert_to_int64 = _conversion(elemwise.Elemwise(scal.convert_to_int64), 'int64')
 """Cast to 64-bit integer"""

-convert_to_float32 = _conversion(elemwise.Elemwise(scal.Identity(scal.specific_out(scal.float32))), 'float32')
+_convert_to_float32 = _conversion(elemwise.Elemwise(scal.convert_to_float32), 'float32')
 """Cast to single-precision floating point"""

-convert_to_float64 = _conversion(elemwise.Elemwise(scal.Identity(scal.specific_out(scal.float64))), 'float64')
+_convert_to_float64 = _conversion(elemwise.Elemwise(scal.convert_to_float64), 'float64')
 """Cast to double-precision floating point"""

-convert_to_complex64  = _conversion(elemwise.Elemwise(scal.Identity(scal.specific_out(scal.complex64))), 'complex64')
+_convert_to_complex64  = _conversion(elemwise.Elemwise(scal.convert_to_complex64), 'complex64')
 """Cast to single-precision complex"""

-convert_to_complex128 = _conversion(elemwise.Elemwise(scal.Identity(scal.specific_out(scal.complex128))), 'complex128')
+_convert_to_complex128 = _conversion(elemwise.Elemwise(scal.convert_to_complex128), 'complex128')
 """Cast to double-precision complex"""

+_cast_mapping = {'int8': _convert_to_int8,
+           'int16': _convert_to_int16,
+           'int32': _convert_to_int32,
+           'int64': _convert_to_int64,
+           'float32': _convert_to_float32,
+           'float64': _convert_to_float64,
+           'complex64': _convert_to_complex64,
+           'complex128': _convert_to_complex128}
+@constructor
+def cast(x, dtype):
+    """Symbolically cast `x` to a Tensor of type `dtype`.""" 
+    if x.type.dtype.startswith('complex') and not dtype.startswith('complex'):
+        raise TypeError('Casting from complex to real is ambiguous: consider real(), imag(), angle() or abs()')
+    return _cast_mapping[dtype](x)
+


 ##########################
@@ -1145,7 +1162,6 @@ def abs_(a):

 pprint.assign(abs_, printing.PatternPrinter(('|%(0)s|', -1000)))

-
 @_scal_elemwise
 def exp(a):
    """e^`a`"""
@@ -1210,6 +1226,83 @@ def sinh(a):
 def tanh(a):
    """hyperbolic tangent of a"""

+class Real(Op):
+    """Extract the real elements of a complex ndarray"""
+    view_map = {0:[0]}
+    def __eq__(self, other):
+        return type(self) == type(other)
+    def __hash__(self):
+        return hash(type(self))
+    def make_node(self, x):
+        _x = as_tensor(x)
+        y_dtype = _x.type.dtype
+        if y_dtype == 'complex64':
+            y_dtype = 'float32'
+        if y_dtype == 'complex128':
+            y_dtype = 'float64'
+        _y = Tensor(y_dtype, _x.type.broadcastable)()
+        return Apply(self, [_x], [_y])
+    def perform(self, node, (x,), (y,)):
+        if str(x.dtype).startswith('complex'):
+            y[0] = x.real
+        else:
+            y[0] = x
+    def grad(self, inputs, (g_y,)):
+        #TODO: waiting on a Complex(real=, imag=) op that can merge
+        #things back into a complex tensor
+        raise NotImplementedError()
+_real = Real()
+@constructor
+def real(x):
+    """Return the real part of real or complex-valued `x`
+
+    For real-valued `x`, `x` itself is returned.
+    """
+    _x = as_tensor_variable(x)
+    if _x.type.dtype.startswith('complex'):
+        return _real(x)
+    else:
+        return _x
+
+class Imag(Op):
+    """Extract the imaginary elements of a complex ndarray"""
+    view_map = {0:[0]}
+    def __eq__(self, other):
+        return type(self) == type(other)
+    def __hash__(self):
+        return hash(type(self))
+    def make_node(self, x):
+        _x = as_tensor_variable(x)
+        if not _x.type.dtype.startswith('complex'):
+            raise TypeError('Imag(x) requires complex x', x)
+        if _x.type.dtype == 'complex64': y_dtype = 'float32'
+        elif _x.type.dtype == 'complex128': y_dtype = 'float64'
+        else:
+            raise NotImplementedError('what is this?', y_dtype)
+        _y = Tensor(y_dtype, _x.type.broadcastable)()
+        return Apply(self, [_x], [_y])
+    def perform(self, node, (x,), (y,)):
+        if str(x.dtype).startswith('complex'):
+            y[0] = x.imag
+        else:
+            y[0] = x * 0
+    def grad(self, inputs, (g_y,)):
+        # TODO: waiting on a complex(real=, imag=) op that can merge
+        # things back into a complex tensor
+        raise NotImplementedError()
+_imag = Imag()
+@constructor
+def imag(x):
+    """Return the imaginary part of real or complex-valued `x`
+
+    For real-valued 'x' this returns `zeros_like(x)`.
+    """
+    _x = as_tensor_variable(x)
+    if _x.type.dtype.startswith('complex'):
+        return _imag(x)
+    else:
+        return zeros_like(x)
+

 ##########################
 # Misc

--- a/theano/tensor/blas.py
+++ b/theano/tensor/blas.py
@@ -255,6 +255,8 @@ class GemmRelated(Op):
            self.case_double_gemm,
            self.end_switch_typenum), '')

+    def build_gemm_version(self):
+        return (1,)

 class Gemm(GemmRelated):
    """In-place version of matrix-matrix multiplication (with accumulation):
@@ -363,7 +365,14 @@ class Gemm(GemmRelated):
    def c_code(self, node, name, (_z, _a, _x, _y, _b), (_zout, ), sub): #DEBUG
        full_code = self.build_gemm_call() % dict(locals(), **sub)
        return full_code
+
+    def c_code_cache_version(self):
+        return (1,) + self.build_gemm_version()
+
 gemm = Gemm()
+    
+
+

 pprint.assign(gemm, FunctionPrinter('gemm'))
 def res_is_a(node, op, maxclients=None):
@@ -635,6 +644,9 @@ class Dot22(GemmRelated):
    def c_code(self, node, name, (_x, _y), (_z, ), sub): #DEBUG
        full_code = self.build_gemm_call() % dict(locals(), **sub)
        return full_code
+    def c_code_cache_version(self):
+        return (1,) + self.build_gemm_version()
+
 _dot22 = Dot22()

 @local_optimizer([T.dot])

--- a/theano/tensor/elemwise.py
+++ b/theano/tensor/elemwise.py
@@ -295,6 +295,9 @@ class DimShuffle(Op):

        return full_code % dict(locals(), **sub)

+    def c_code_cache_version(self):
+        return (1,)
+
    def grad(self, (x, ), (gz, )):
        gz = as_tensor_variable(gz)
        grad_order = ['x'] * len(x.type.broadcastable)
@@ -487,7 +490,8 @@ class Elemwise(Op):
            return self.name

    def grad(self, inputs, ograds):
-        ograds = map(as_tensor_variable, ograds) # this shouldn't be necessary...
+        # Gradients (especially on the final costs) don't have to be symbolic
+        ograds = map(as_tensor_variable, ograds) 
        scalar_inputs = [Scalar(dtype = t.type.dtype)() for t in inputs]
        scalar_ograds = [Scalar(dtype = ograd.type.dtype)() for ograd in ograds]
        scalar_igrads = self.scalar_op.grad(scalar_inputs, scalar_ograds)
@@ -695,8 +699,20 @@ class Elemwise(Op):
    def c_support_code(self):
        return self.scalar_op.c_support_code()

-    def c_code_cache_version(self):
-        return (4,)
+    def c_code_cache_version_apply(self, node):
+        version = [4] # the version corresponding to the c code in this Op
+
+        # now we insert versions for the ops on which we depend...
+        scalar_node = Apply(self.scalar_op,
+                [Scalar(dtype = input.type.dtype)() for input in node.inputs],
+                [Scalar(dtype = output.type.dtype)() for output in node.outputs])
+        version.extend(self.scalar_op.c_code_cache_version_apply(scalar_node))
+        for i in node.inputs + node.outputs:
+            version.extend(Scalar(dtype=i.type.dtype).c_code_cache_version())
+        if all(version):
+            return tuple(version)
+        else:
+            return ()

 # def elemwise_to_scal(env):
 #     mapping = {}
@@ -884,6 +900,21 @@ class CAReduce(Op):
        code = "\n".join(self._c_all(node, name, inames, onames, sub))
        return code

+    def c_code_cache_version_apply(self, node):
+        version = [2] # the version corresponding to the c code in this Op
+
+        # now we insert versions for the ops on which we depend...
+        scalar_node = Apply(self.scalar_op,
+                [Scalar(dtype = input.type.dtype)() for input in node.inputs],
+                [Scalar(dtype = output.type.dtype)() for output in node.outputs])
+        version.extend(self.scalar_op.c_code_cache_version_apply(scalar_node))
+        for i in node.inputs + node.outputs:
+            version.extend(Scalar(dtype=i.type.dtype).c_code_cache_version())
+        if all(version):
+            return tuple(version)
+        else:
+            return ()
+

 class Sum(CAReduce):
    """

--- a/theano/tensor/nnet.py
+++ b/theano/tensor/nnet.py
-## This file contain ops that are not currently integrated in the core of threano. 
-## Not all of those ops have been thoroughly tested.
+"""Provides neural-network specific Ops.
+
+:note: TODO: factor this out into a neural-network toolbox.
+"""

-#from theano import tensor, scalar
 from theano import gof
 from theano import scalar
 from theano import printing
@@ -39,6 +40,8 @@ class ScalarSigmoid(scalar.UnaryScalarOp):
                   ? 1.0
                   : 1.0 /(1.0+exp(-%(x)s));""" % locals()
        raise NotImplementedError('only floatingpoint is implemented')
+    def c_code_cache_version(self):
+        return (1,)
 scalar_sigmoid = ScalarSigmoid(scalar.upgrade_to_float, name='scalar_sigmoid')
 sigmoid = elemwise.Elemwise(scalar_sigmoid, name='sigmoid')

@@ -66,6 +69,8 @@ class ScalarSoftplus(scalar.UnaryScalarOp):
                   ? %(x)s
                   : log1p(exp(%(x)s));""" % locals()
        raise NotImplementedError('only floating point x is implemented')
+    def c_code_cache_version(self):
+        return (1,)
 scalar_softplus = ScalarSoftplus(scalar.upgrade_to_float, name='scalar_softplus')
 softplus = elemwise.Elemwise(scalar_softplus, name='softplus')

@@ -133,7 +138,7 @@ class SoftmaxWithBias(gof.Op):
        return ['<iostream>','<cmath>']

    def c_code_cache_version(self):
-        return ()
+        return (3,)
    @staticmethod
    def c_code_template():
        # this implementation was lifted from
@@ -294,7 +299,7 @@ class SoftmaxGrad(gof.Op):
        raise NotImplementedError()

    def c_code_cache_version(self):
-        return ()
+        return (3,)
    def c_code(self, node, name, (dy, sm), (dx,), sub):
        return '''
        if ((%(dy)s->descr->type_num != PyArray_DOUBLE) && (%(dy)s->descr->type_num != PyArray_FLOAT))
@@ -402,10 +407,15 @@ def local_softmax_with_bias(node):
            non_vectors = []
            for x_in in x.owner.inputs:
                if list(x_in.type.broadcastable) == [True, False]:
-                    if x_in.owner and isinstance(x_in.owner.op, tensor.DimShuffle):
-                        assert len(x_in.owner.inputs)==1
+                    print isinstance(x_in.owner.op, tensor.DimShuffle)
+                    #since specialization comes relatively late in optimization, 
+                    # we don't want to put in extra DimShuffles un-necessarily.
+                    if x_in.owner and isinstance(x_in.owner.op, tensor.DimShuffle)\
+                            and list(x_in.owner.inputs[0].type.broadcastable)==[False]:
+                        # cut out the DimShuffle that was broadcasting a vector
                        vectors.append(x_in.owner.inputs[0])
                    else:
+                        # insert an extra DimShuffle to correct the old one
                        vectors.append(tensor.DimShuffle((True, False), (1,))(x_in))
                else:
                    non_vectors.append(x_in)
@@ -627,7 +637,7 @@ class CrossentropySoftmaxArgmax1HotWithBias(gof.Op):


    def c_code_cache_version(self):
-        return ()
+        return (2,)
    def c_code(self, node, name, (x, b, y_idx), (nll, sm, am), sub):
        y_idx_type = node.inputs[2].type.dtype_specs()[1]
        am_type = y_idx_type
@@ -659,7 +669,7 @@ class CrossentropySoftmax1HotWithBiasDx (gof.Op):
    def grad(self, *args):
        raise NotImplementedError()
    def c_code_cache_version(self):
-        return ()
+        return (2,)
    def c_code(self, node, name, (dnll, sm, y_idx), (dx,), sub):
        y_idx_type = node.inputs[2].type.dtype_specs()[1]
        return """

--- a/theano/tensor/tests/test_basic.py
+++ b/theano/tensor/tests/test_basic.py
@@ -594,19 +594,6 @@ class T_Shape(unittest.TestCase):
        s = shape(numpy.ones((5, 3, 10)))
        self.failUnless((eval_outputs([s]) == [5, 3, 10]).all())

-class T_Cast(unittest.TestCase):
-    def test_basic(self):
-        for type1 in ['int8', 'int16', 'int32', 'int64', 'float32', 'float64']:
-            x = TensorType(dtype = type1, broadcastable = (False, )).make_variable()
-            for type2, converter in zip(['int8', 'int16', 'int32', 'int64', 'float32', 'float64'],
-                                        [convert_to_int8, convert_to_int16, convert_to_int32, convert_to_int64,
-                                         convert_to_float32, convert_to_float64]):
-                y = converter(x)
-                f = inplace_func([compile.In(x, strict = True)], y)
-                a = numpy.arange(10, dtype = type1)
-                b = f(a)
-                self.failUnless(numpy.all(b == numpy.arange(10, dtype = type2)))
-
 class T_max_and_argmax(unittest.TestCase):
    def setUp(self):
        utt.seed_rng()
@@ -1920,43 +1907,6 @@ def test_sum_overflow():
    f = function([a], sum(a))
    assert f([1]*300) == 300

-def test_convert_to_complex():
-    a = value(numpy.ones(3, dtype='complex64')+0.5j)
-    b = value(numpy.ones(3, dtype='complex128')+0.5j)
-
-    f = function([a],basic.convert_to_complex128(a))
-#we need to compare with the same type.
-    assert a.type.values_eq_approx(b.data, f(a.data))
-    
-    f = function([b],basic.convert_to_complex128(b))
-    assert b.type.values_eq_approx(b.data, f(b.data))
-
-    f = function([a],basic.convert_to_complex64(a))
-    assert a.type.values_eq_approx(a.data, f(a.data))
-
-    #down cast don,t work for now
-    #f = function([b],basic.convert_to_complex64(b))
-    #assert b.type.values_eq_approx(b.data, f(b.data))
-
-    for nbits in (64, 128):
-        for t in ['int8','int16','int32','int64','float32','float64']:
-            a = value(numpy.ones(3, dtype=t))
-            b = value(numpy.ones(3, dtype='complex128'))
-            f = function([a],basic.convert_to_complex128(a))
-            assert a.type.values_eq_approx(b.data, f(a.data))
-        for t in ['int8','int16','int32','int64','float32']:
-            a = value(numpy.ones(3, dtype=t))
-            b = value(numpy.ones(3, dtype='complex64'))
-            f = function([a],basic.convert_to_complex64(a))
-            assert a.type.values_eq_approx(b.data, f(a.data))
-
-        #this work, but should we allow it? How well it is implemented?
-        for t in ['float64']:
-            a = value(numpy.ones(3, dtype=t))
-            b = value(numpy.ones(3, dtype='complex64'))
-            f = function([a],basic.convert_to_complex64(a))
-            assert a.type.values_eq_approx(b.data, f(a.data))
-
 def test_default():
    x, y = dscalars('xy')
    z = default(x, y)
@@ -1974,16 +1924,6 @@ def test_default_state():
    f['x'] = None
    assert f(1) == 4.8
    assert f(2.2) == 7
-    
-def test_bug_complext_10_august_09():
-    v0 = dmatrix()
-    v1 = basic.convert_to_complex128(v0)
-
-    inputs = [v0]
-    outputs = [v1]
-    f = function(inputs, outputs)
-    i = numpy.zeros((2,2))
-    assert (f(i)==numpy.zeros((2,2))).all()

 if __name__ == '__main__':
    if len(sys.argv) >= 2 and sys.argv[1] == 'OPT':

--- a/theano/tensor/tests/test_casting.py
+++ b/theano/tensor/tests/test_casting.py
+
+import unittest
+from theano import function
+from theano.tensor.basic import (_convert_to_int32, _convert_to_int8, _convert_to_int16,
+        _convert_to_int64, _convert_to_float32, _convert_to_float64)
+from theano.tensor import *
+
+
+class test_casting(unittest.TestCase):
+    def test_0(self):
+        for op_fn in _convert_to_int32, _convert_to_float32, _convert_to_float64:
+            for type_fn in bvector, ivector, fvector, dvector:
+                x = type_fn()
+                f = function([x], op_fn(x))
+
+                xval = numpy.asarray(numpy.random.rand(10)*10, dtype=type_fn.dtype)
+                yval = f(xval)
+                assert str(yval.dtype) == op_fn.scalar_op.output_types_preference.spec[0].dtype
+
+    def test_illegal(self):
+        try:
+            x = zmatrix()
+            function([x], cast(x, 'float64'))(numpy.ones((2,3), dtype='complex128'))
+        except TypeError:
+            return
+        assert 0
+
+    def test_basic(self):
+        for type1 in ['int8', 'int16', 'int32', 'int64', 'float32', 'float64']:
+            x = TensorType(dtype = type1, broadcastable = (False, )).make_variable()
+            for type2, converter in zip(['int8', 'int16', 'int32', 'int64', 'float32', 'float64'],
+                                        [_convert_to_int8, _convert_to_int16,
+                                            _convert_to_int32, _convert_to_int64,
+                                         _convert_to_float32, _convert_to_float64]):
+                y = converter(x)
+                f = function([compile.In(x, strict = True)], y)
+                a = numpy.arange(10, dtype = type1)
+                b = f(a)
+                self.failUnless(numpy.all(b == numpy.arange(10, dtype = type2)))
+
+    def test_convert_to_complex(self):
+        a = value(numpy.ones(3, dtype='complex64')+0.5j)
+        b = value(numpy.ones(3, dtype='complex128')+0.5j)
+
+        f = function([a],basic._convert_to_complex128(a))
+        #we need to compare with the same type.
+        assert a.type.values_eq_approx(b.data, f(a.data))
+        
+        f = function([b],basic._convert_to_complex128(b))
+        assert b.type.values_eq_approx(b.data, f(b.data))
+
+        f = function([a],basic._convert_to_complex64(a))
+        assert a.type.values_eq_approx(a.data, f(a.data))
+
+        f = function([b],basic._convert_to_complex64(b))
+        assert b.type.values_eq_approx(a.data, f(b.data))
+
+        for nbits in (64, 128):
+            # upcasting to complex128
+            for t in ['int8','int16','int32','int64','float32','float64']:
+                a = value(numpy.ones(3, dtype=t))
+                b = value(numpy.ones(3, dtype='complex128'))
+                f = function([a],basic._convert_to_complex128(a))
+                assert a.type.values_eq_approx(b.data, f(a.data))
+
+            # upcasting to complex64
+            for t in ['int8','int16','int32','int64','float32']:
+                a = value(numpy.ones(3, dtype=t))
+                b = value(numpy.ones(3, dtype='complex64'))
+                f = function([a],basic._convert_to_complex64(a))
+                assert a.type.values_eq_approx(b.data, f(a.data))
+
+            # downcast to complex64
+            for t in ['float64']:
+                a = value(numpy.ones(3, dtype=t))
+                b = value(numpy.ones(3, dtype='complex64'))
+                f = function([a],basic._convert_to_complex64(a))
+                assert a.type.values_eq_approx(b.data, f(a.data))
+
+    
+    def test_bug_complext_10_august_09(self):
+        v0 = dmatrix()
+        v1 = basic._convert_to_complex128(v0)
+
+        inputs = [v0]
+        outputs = [v1]
+        f = function(inputs, outputs)
+        i = numpy.zeros((2,2))
+        assert (f(i)==numpy.zeros((2,2))).all()
--- a/theano/tensor/tests/test_complex.py
+++ b/theano/tensor/tests/test_complex.py
+import unittest
+import theano
+from theano.tensor import *
+
+class TestRealImag(unittest.TestCase):
+
+    def test0(self):
+        x= zvector()
+        rng = numpy.random.RandomState(23)
+        xval = numpy.asarray(list(numpy.complex(rng.randn(), rng.randn()) for i in xrange(10)))
+        assert numpy.all( xval.real == theano.function([x], real(x))(xval))
+        assert numpy.all( xval.imag == theano.function([x], imag(x))(xval))
+
+    def test_on_real_input(self):
+        x= dvector()
+        rng = numpy.random.RandomState(23)
+        xval = rng.randn(10)
+        assert numpy.all( 0 == theano.function([x], imag(x))(xval))
+        assert numpy.all( xval == theano.function([x], real(x))(xval))
+
+    def test_cast(self):
+        x= zvector()
+        self.failUnlessRaises(TypeError, cast, x, 'int32')