merge

0d90f455 · James Bergstra · ce329333 · 8464c20d · 0d90f455 · 0d90f455
--- a/theano/compile/sandbox/__init__.py
+++ b/theano/compile/sandbox/__init__.py
-from .sharedvalue import shared
+from .sharedvalue import shared, shared_constructor
 from .pfunc import pfunc
--- a/theano/scalar/basic.py
+++ b/theano/scalar/basic.py
@@ -187,28 +187,28 @@ class Scalar(Type):
         };
         """
        operator_eq = """
-        template <> %(mytype)s & %(mytype)s::operator =(const npy_int8 & y)
+        template <> %(mytype)s & %(mytype)s::operator=<npy_int8>(const npy_int8 & y)
        { this->real=y; this->imag=0; return *this; }
-        template <> %(mytype)s & %(mytype)s::operator =(const npy_int16 & y)
+        template <> %(mytype)s & %(mytype)s::operator=<npy_int16>(const npy_int16 & y)
        { this->real=y; this->imag=0; return *this; }
-        template <> %(mytype)s & %(mytype)s::operator =(const npy_int32 & y)
+        template <> %(mytype)s & %(mytype)s::operator=<npy_int32>(const npy_int32 & y)
        { this->real=y; this->imag=0; return *this; }
-        template <> %(mytype)s & %(mytype)s::operator =(const npy_int64 & y)
+        template <> %(mytype)s & %(mytype)s::operator=<npy_int64>(const npy_int64 & y)
        { this->real=y; this->imag=0; return *this; }
-        template <> %(mytype)s & %(mytype)s::operator =(const npy_float32 & y)
+        template <> %(mytype)s & %(mytype)s::operator=<npy_float32>(const npy_float32 & y)
        { this->real=y; this->imag=0; return *this; }
-        template <> %(mytype)s & %(mytype)s::operator =(const npy_float64 & y)
+        template <> %(mytype)s & %(mytype)s::operator=<npy_float64>(const npy_float64 & y)
        { this->real=y; this->imag=0; return *this; }
-        template <> %(mytype)s & %(mytype)s::operator =(const theano_complex128 & y)
+        template <> %(mytype)s & %(mytype)s::operator=<theano_complex128>(const theano_complex128 & y)
        { this->real=y.real; this->imag=y.imag; return *this; }
-        template <> %(mytype)s & %(mytype)s::operator =(const theano_complex64 & y)
+        template <> %(mytype)s & %(mytype)s::operator=<theano_complex64>(const theano_complex64 & y)
        { this->real=y.real; this->imag=y.imag; return *this; }
        """
@@ -219,7 +219,8 @@ class Scalar(Type):
                + operator_eq % dict(mytype='theano_complex64')
    def c_code_cache_version(self):
-        return (2,)
+        return (3,)  #explicit T given in specialization of operator= lines.  This makes it compile with open64
+        #2,
 int8 = Scalar('int8')
@@ -666,10 +667,10 @@ class Mul(ScalarOp):
      retval = []
      for input in inputs:
        if input.type in grad_types:
-          retval += [mul(*([gz] + utils.difference(inputs, [input])))]
+          retval += [cast(mul(*([gz] + utils.difference(inputs, [input]))), input.type.dtype)]
        else:
          retval += [None]
      return retval
        #return [(mul(*([gz] + utils.difference(inputs, [input]))) 

--- a/theano/tensor/basic.py
+++ b/theano/tensor/basic.py
@@ -1417,15 +1417,19 @@ def mean(input, axis = None):
    if str(input.dtype).startswith('int'):
        # we need to cast eventually anyway, and this helps
        # to prevents overflow
-        input = convert_to_float64(input)
+        input = cast(input, 'float64')
    s = sum(input, axis)
    shp = shape(input)
+    if input.dtype == 'float32':
+        shp = cast(shp, 'float32')
    if axis is None:
        axis = range(input.type.ndim)
    elif isinstance(axis, int):
        axis = [axis]
    for i in axis:
        s = s / shp[i]
+    if input.dtype.startswith('float'):
+        assert input.dtype == s.dtype
    return s
 @constructor
@@ -2543,12 +2547,15 @@ class Dot(Op):
    def grad(self, (x, y), (gz,)):
        if gz.type.ndim == 0:
-            return gz * y, gz * x
+            rval = gz * y, gz * x
-        if x.type.ndim == 1 and y.type.ndim > 1:
+        elif x.type.ndim == 1 and y.type.ndim > 1:
-            return dot(gz, y.T), outer(x.T, gz)
+            rval = dot(gz, y.T), outer(x.T, gz)
-        if x.type.ndim > 1 and y.type.ndim == 1:
+        elif x.type.ndim > 1 and y.type.ndim == 1:
-            return outer(gz, y.T), dot(x.T, gz) 
+            rval = outer(gz, y.T), dot(x.T, gz) 
-        return dot(gz, y.T), dot(x.T, gz)
+        else:
+            rval = dot(gz, y.T), dot(x.T, gz)
+        return cast(rval[0], x.dtype), cast(rval[1], y.dtype)
    def __str__(self):
        return "dot"
 dot = Dot()

--- a/theano/tensor/blas.py
+++ b/theano/tensor/blas.py
 """Ops and optimizations for using BLAS function calls to evaluate linear algebra expressions"""
-import os, sys, traceback
+import os, sys, traceback, logging
 import numpy
 from theano.gof import (utils, Op, Apply, view_roots, PatternSub, DestroyHandler, 
@@ -17,6 +17,13 @@ from theano import compile  #to register the optimizer built by this file
 from theano.tensor.blas_headers import cblas_header_text, blas_header_text
+_logger = logging.getLogger('theano.tensor.blas')
+def debug(*msg): _logger.debug(' '.join(str(m) for m in msg))
+def info(*msg): _logger.info(' '.join(str(m) for m in msg))
+def warn(*msg): _logger.warn(' '.join(str(m) for m in msg))
+def warning(*msg): _logger.warning(' '.join(str(m) for m in msg))
+def error(*msg): _logger.error(' '.join(str(m) for m in msg))
 @utils.memoize
 def ldflags(libs=True, flags=False):
    """Return a list of libraries against which an Op's object file should be
@@ -655,6 +662,8 @@ def local_dot_to_dot22(node):
        x,y = node.inputs
        if _is_real_matrix(x) and y.type == x.type:
            return [_dot22(*node.inputs)]
+        else:
+            info('Not optimizing dot with inputs', x, y)
    else:
        return False
 register_specialize(local_dot_to_dot22)