提交 6736be29 authored 作者: Olivier Delalleau's avatar Olivier Delalleau

Merged

......@@ -131,7 +131,7 @@ optdb.register('merge1', gof.MergeOptimizer(),
0, 'fast_run', 'fast_compile')
optdb.register('canonicalize', gof.EquilibriumDB(), # rearranges elemwise expressions
1, 'fast_run')
optdb.register('merge1.2', gof.MergeOptimizer(skip_const_merge=True),
optdb.register('merge1.2', gof.MergeOptimizer(skip_const_merge=False),
1.2, 'fast_run', 'fast_compile')
optdb.register('stabilize', gof.EquilibriumDB(), # replace unstable subgraphs
1.5, 'fast_run')
......
......@@ -956,21 +956,26 @@ CudaNdarray_Subscript(PyObject * py_self, PyObject * key)
CudaNdarray * self = (CudaNdarray*) py_self;
PyObject * py_rval = NULL;
CudaNdarray * rval = NULL;
PyObject * intobj = NULL;
//PyObject_Print(key, stderr, 0);
if (key == Py_Ellipsis)
{
Py_INCREF(py_self);
return py_self;
}
else if (PyInt_Check(key)) //INDEXING BY INTEGER
if ((intobj=PyNumber_Int(key))) //INDEXING BY INTEGER
//else if (PyInt_Check(key)) //INDEXING BY INTEGER
{
int d_idx = PyInt_AsLong(intobj);
Py_DECREF(intobj); intobj=NULL;
//int d_idx = PyInt_AsLong(key);
if (self->nd == 0)
{
PyErr_SetString(PyExc_NotImplementedError, "index into 0-d array");
return NULL;
}
int d_idx = PyInt_AsLong(key);
int d_dim = CudaNdarray_HOST_DIMS(self)[0];
int offset = 0;
......@@ -1009,7 +1014,11 @@ CudaNdarray_Subscript(PyObject * py_self, PyObject * key)
CudaNdarray_set_dim(rval, d-1, CudaNdarray_HOST_DIMS(self)[d]);
}
}
else if (PySlice_Check(key)) //INDEXING BY SLICE
else
{
PyErr_Clear();
}
if (PySlice_Check(key)) //INDEXING BY SLICE
{
if (self->nd == 0)
{
......@@ -1057,7 +1066,7 @@ CudaNdarray_Subscript(PyObject * py_self, PyObject * key)
CudaNdarray_set_dim(rval, d, CudaNdarray_HOST_DIMS(self)[d]);
}
}
else if (PyTuple_Check(key)) //INDEXING BY TUPLE
if (PyTuple_Check(key)) //INDEXING BY TUPLE
{
//elements of the tuple can be either integers or slices
//the dimensionality of the view we will return is diminished for each slice in the tuple
......@@ -1127,9 +1136,11 @@ CudaNdarray_Subscript(PyObject * py_self, PyObject * key)
}
++rval_d;
}
else if (PyInt_Check(key_d))
else if ((intobj=PyNumber_Int(key_d)))
{
int d_idx = PyInt_AsLong(key_d);
int d_idx = PyInt_AsLong(intobj);
Py_DECREF(intobj);
intobj = NULL;
int d_dim = CudaNdarray_HOST_DIMS(self)[d];
if ((d_idx >= 0) && (d_idx < d_dim))
......@@ -1151,6 +1162,7 @@ CudaNdarray_Subscript(PyObject * py_self, PyObject * key)
}
else
{
PyErr_Clear(); // clear the error set by PyNumber_Int
PyErr_SetString(PyExc_IndexError, "index must be either int or slice");
Py_DECREF(rval);
return NULL;
......@@ -1158,16 +1170,16 @@ CudaNdarray_Subscript(PyObject * py_self, PyObject * key)
}
}
}
else
{
PyErr_SetString(PyExc_NotImplementedError, "Unknown key type");
return NULL;
}
if (py_rval)
{
if (verbose) fprint_CudaNdarray(stderr, self);
if (verbose) fprint_CudaNdarray(stderr, rval);
}
else
{
PyErr_SetString(PyExc_NotImplementedError, "Unknown key type");
return NULL;
}
return py_rval;
}
......@@ -1776,6 +1788,10 @@ int CudaNdarray_CopyFromCudaNdarray(CudaNdarray * self, CudaNdarray * other)
}
size *= (unsigned int) CudaNdarray_HOST_DIMS(self)[i];
}
if (0 == size)
{
return 0; //nothing to copy, we're done.
}
if (CudaNdarray_is_c_contiguous(self) && CudaNdarray_is_c_contiguous(other))
{
cublasScopy(size, CudaNdarray_DEV_DATA(other), 1, CudaNdarray_DEV_DATA(self), 1);
......
差异被折叠。
......@@ -1257,7 +1257,7 @@ class Prepend_scalar_constant_to_each_row(gof.Op):
def __eq__(self, other):
return (type(self) == type(other)) and (self.val == other.val)
def __hash__(self):
return tensor.hashtype(self) ^ hash(self.val.value)
return tensor.hashtype(self) ^ hash(self.val.data)
def __str__(self):
return '%s{%s}'%(self.__class__.__name__,self.val)
......
......@@ -610,6 +610,43 @@ def local_alloc_unary(node):
return [T.alloc(T.cast(v, node.outputs[0].dtype), *shp)]
############################
# Constant Canonicalization
############################
@register_canonicalize
@gof.local_optimizer([])
def local_upcast_elemwise_constant_inputs(node):
"""This explicitly upcasts constant inputs to elemwise Ops, when those Ops do implicit upcasting anyway.
Rationale: it helps merge things like (1-x) and (1.0 - x).
"""
if isinstance(node.op, T.Elemwise):
scalar_op = node.op.scalar_op
#print "aa", scalar_op.output_types_preference
if scalar_op.output_types_preference in (T.scal.upgrade_to_float, T.scal.upcast_out):
# this is the kind of op that we can screw with the input dtypes by upcasting
# explicitly
#print "HELLO??"
output_dtype = node.outputs[0].type.dtype
new_inputs = []
for i in node.inputs:
if i.type.dtype == output_dtype:
new_inputs.append(i)
else:
try:
cval_i = get_constant_value(i) # works only for scalars I think
new_inputs.append(T.cast(cval_i, output_dtype))
except:
if isinstance(i, T.TensorConstant): #for the case of a non-scalar
new_inputs.append(T.cast(i, output_dtype))
else:
new_inputs.append(i)
if new_inputs != node.inputs:
return [node.op(*new_inputs)]
##################
# Subtensor opts #
##################
......@@ -1717,6 +1754,7 @@ def local_greedy_distributor(node):
return [rval]
register_canonicalize(local_greedy_distributor)
register_stabilize(local_greedy_distributor)
......@@ -1748,6 +1786,7 @@ def constant_folding(node):
return msg
register_canonicalize(constant_folding)
register_stabilize(constant_folding) # because
register_specialize(constant_folding)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论