Merged

6736be29 · Olivier Delalleau · 0832b113 · e4bb7837 · 6736be29 · 6736be29
--- a/theano/compile/mode.py
+++ b/theano/compile/mode.py
@@ -131,7 +131,7 @@ optdb.register('merge1', gof.MergeOptimizer(),
        0, 'fast_run', 'fast_compile')
 optdb.register('canonicalize', gof.EquilibriumDB(),         # rearranges elemwise expressions
        1, 'fast_run')
-optdb.register('merge1.2', gof.MergeOptimizer(skip_const_merge=True),
+optdb.register('merge1.2', gof.MergeOptimizer(skip_const_merge=False),
        1.2, 'fast_run', 'fast_compile')
 optdb.register('stabilize', gof.EquilibriumDB(),            # replace unstable subgraphs
        1.5, 'fast_run')          

--- a/theano/sandbox/cuda/cuda_ndarray.cu
+++ b/theano/sandbox/cuda/cuda_ndarray.cu
@@ -956,21 +956,26 @@ CudaNdarray_Subscript(PyObject * py_self, PyObject * key)
    CudaNdarray * self = (CudaNdarray*) py_self;
    PyObject * py_rval = NULL;
    CudaNdarray * rval = NULL;
+    PyObject * intobj = NULL;
+
+    //PyObject_Print(key, stderr, 0);

    if (key == Py_Ellipsis)
    {
        Py_INCREF(py_self);
        return py_self;
    }
-    else if (PyInt_Check(key)) //INDEXING BY INTEGER
+    if ((intobj=PyNumber_Int(key))) //INDEXING BY INTEGER
+    //else if (PyInt_Check(key)) //INDEXING BY INTEGER
    {
+        int d_idx = PyInt_AsLong(intobj);
+        Py_DECREF(intobj); intobj=NULL;
+        //int d_idx = PyInt_AsLong(key);
        if (self->nd == 0)
        {
            PyErr_SetString(PyExc_NotImplementedError, "index into 0-d array");
            return NULL;
        }
-
-        int d_idx = PyInt_AsLong(key);
        int d_dim = CudaNdarray_HOST_DIMS(self)[0];
        int offset = 0;

@@ -1009,7 +1014,11 @@ CudaNdarray_Subscript(PyObject * py_self, PyObject * key)
            CudaNdarray_set_dim(rval, d-1, CudaNdarray_HOST_DIMS(self)[d]);
        }
    }
-    else if (PySlice_Check(key)) //INDEXING BY SLICE
+    else
+    {
+        PyErr_Clear();
+    }
+    if (PySlice_Check(key)) //INDEXING BY SLICE
    {
        if (self->nd == 0)
        {
@@ -1057,7 +1066,7 @@ CudaNdarray_Subscript(PyObject * py_self, PyObject * key)
            CudaNdarray_set_dim(rval, d, CudaNdarray_HOST_DIMS(self)[d]);
        }
    }
-    else if (PyTuple_Check(key)) //INDEXING BY TUPLE
+    if (PyTuple_Check(key)) //INDEXING BY TUPLE
    {
        //elements of the tuple can be either integers or slices
        //the dimensionality of the view we will return is diminished for each slice in the tuple
@@ -1127,9 +1136,11 @@ CudaNdarray_Subscript(PyObject * py_self, PyObject * key)
                    }
                    ++rval_d;
                }
-                else if (PyInt_Check(key_d))
+                else if ((intobj=PyNumber_Int(key_d)))
                {
-                    int d_idx = PyInt_AsLong(key_d);
+                    int d_idx = PyInt_AsLong(intobj);
+                    Py_DECREF(intobj);
+                    intobj = NULL;
                    int d_dim = CudaNdarray_HOST_DIMS(self)[d];

                    if ((d_idx >= 0) && (d_idx < d_dim))
@@ -1151,6 +1162,7 @@ CudaNdarray_Subscript(PyObject * py_self, PyObject * key)
                }
                else
                {
+                    PyErr_Clear(); // clear the error set by PyNumber_Int
                    PyErr_SetString(PyExc_IndexError, "index must be either int or slice");
                    Py_DECREF(rval);
                    return NULL;
@@ -1158,16 +1170,16 @@ CudaNdarray_Subscript(PyObject * py_self, PyObject * key)
            }
        }
    }
-    else
-    {
-        PyErr_SetString(PyExc_NotImplementedError, "Unknown key type");
-        return NULL;
-    }
    if (py_rval)
    {
        if (verbose) fprint_CudaNdarray(stderr, self);
        if (verbose) fprint_CudaNdarray(stderr, rval);
    }
+    else
+    {
+        PyErr_SetString(PyExc_NotImplementedError, "Unknown key type");
+        return NULL;
+    }
    return py_rval;
 }

@@ -1776,6 +1788,10 @@ int CudaNdarray_CopyFromCudaNdarray(CudaNdarray * self, CudaNdarray * other)
        }
        size *= (unsigned int) CudaNdarray_HOST_DIMS(self)[i];
    }
+    if (0 == size)
+    {
+        return 0; //nothing to copy, we're done.
+    }
    if (CudaNdarray_is_c_contiguous(self) && CudaNdarray_is_c_contiguous(other))
    {
        cublasScopy(size, CudaNdarray_DEV_DATA(other), 1, CudaNdarray_DEV_DATA(self), 1);

--- a/theano/sandbox/rng_mrg.py
+++ b/theano/sandbox/rng_mrg.py
--- a/theano/tensor/nnet/nnet.py
+++ b/theano/tensor/nnet/nnet.py
@@ -1257,7 +1257,7 @@ class Prepend_scalar_constant_to_each_row(gof.Op):
    def __eq__(self, other):
        return (type(self) == type(other)) and (self.val == other.val)
    def __hash__(self):
-        return tensor.hashtype(self) ^ hash(self.val.value)
+        return tensor.hashtype(self) ^ hash(self.val.data)
    def __str__(self):
        return '%s{%s}'%(self.__class__.__name__,self.val)


--- a/theano/tensor/opt.py
+++ b/theano/tensor/opt.py
@@ -610,6 +610,43 @@ def local_alloc_unary(node):
            return [T.alloc(T.cast(v, node.outputs[0].dtype), *shp)]


+############################
+# Constant Canonicalization
+############################
+
+@register_canonicalize
+@gof.local_optimizer([])
+def local_upcast_elemwise_constant_inputs(node):
+    """This explicitly upcasts constant inputs to elemwise Ops, when those Ops do implicit upcasting anyway.
+
+    Rationale: it helps merge things like (1-x) and (1.0 - x).
+    """
+    if isinstance(node.op, T.Elemwise):
+        scalar_op = node.op.scalar_op
+        #print "aa", scalar_op.output_types_preference
+        if scalar_op.output_types_preference in (T.scal.upgrade_to_float, T.scal.upcast_out):
+            # this is the kind of op that we can screw with the input dtypes by upcasting
+            # explicitly
+            #print "HELLO??"
+            output_dtype = node.outputs[0].type.dtype
+            new_inputs = []
+            for i in node.inputs:
+                if i.type.dtype == output_dtype:
+                    new_inputs.append(i)
+                else:
+                    try:
+                        cval_i = get_constant_value(i)    # works only for scalars I think
+                        new_inputs.append(T.cast(cval_i, output_dtype))
+                    except:
+                        if isinstance(i, T.TensorConstant): #for the case of a non-scalar
+                            new_inputs.append(T.cast(i, output_dtype))
+                        else:
+                            new_inputs.append(i)
+
+            if new_inputs != node.inputs:
+                return [node.op(*new_inputs)]
+    
+
 ##################
 # Subtensor opts #
 ##################
@@ -1717,6 +1754,7 @@ def local_greedy_distributor(node):
    return [rval]

 register_canonicalize(local_greedy_distributor)
+register_stabilize(local_greedy_distributor)



@@ -1748,6 +1786,7 @@ def constant_folding(node):
    return msg

 register_canonicalize(constant_folding)
+register_stabilize(constant_folding) # because 
 register_specialize(constant_folding)