提交 6736be29 authored 作者: Olivier Delalleau's avatar Olivier Delalleau

Merged

...@@ -131,7 +131,7 @@ optdb.register('merge1', gof.MergeOptimizer(), ...@@ -131,7 +131,7 @@ optdb.register('merge1', gof.MergeOptimizer(),
0, 'fast_run', 'fast_compile') 0, 'fast_run', 'fast_compile')
optdb.register('canonicalize', gof.EquilibriumDB(), # rearranges elemwise expressions optdb.register('canonicalize', gof.EquilibriumDB(), # rearranges elemwise expressions
1, 'fast_run') 1, 'fast_run')
optdb.register('merge1.2', gof.MergeOptimizer(skip_const_merge=True), optdb.register('merge1.2', gof.MergeOptimizer(skip_const_merge=False),
1.2, 'fast_run', 'fast_compile') 1.2, 'fast_run', 'fast_compile')
optdb.register('stabilize', gof.EquilibriumDB(), # replace unstable subgraphs optdb.register('stabilize', gof.EquilibriumDB(), # replace unstable subgraphs
1.5, 'fast_run') 1.5, 'fast_run')
......
...@@ -956,21 +956,26 @@ CudaNdarray_Subscript(PyObject * py_self, PyObject * key) ...@@ -956,21 +956,26 @@ CudaNdarray_Subscript(PyObject * py_self, PyObject * key)
CudaNdarray * self = (CudaNdarray*) py_self; CudaNdarray * self = (CudaNdarray*) py_self;
PyObject * py_rval = NULL; PyObject * py_rval = NULL;
CudaNdarray * rval = NULL; CudaNdarray * rval = NULL;
PyObject * intobj = NULL;
//PyObject_Print(key, stderr, 0);
if (key == Py_Ellipsis) if (key == Py_Ellipsis)
{ {
Py_INCREF(py_self); Py_INCREF(py_self);
return py_self; return py_self;
} }
else if (PyInt_Check(key)) //INDEXING BY INTEGER if ((intobj=PyNumber_Int(key))) //INDEXING BY INTEGER
//else if (PyInt_Check(key)) //INDEXING BY INTEGER
{ {
int d_idx = PyInt_AsLong(intobj);
Py_DECREF(intobj); intobj=NULL;
//int d_idx = PyInt_AsLong(key);
if (self->nd == 0) if (self->nd == 0)
{ {
PyErr_SetString(PyExc_NotImplementedError, "index into 0-d array"); PyErr_SetString(PyExc_NotImplementedError, "index into 0-d array");
return NULL; return NULL;
} }
int d_idx = PyInt_AsLong(key);
int d_dim = CudaNdarray_HOST_DIMS(self)[0]; int d_dim = CudaNdarray_HOST_DIMS(self)[0];
int offset = 0; int offset = 0;
...@@ -1009,7 +1014,11 @@ CudaNdarray_Subscript(PyObject * py_self, PyObject * key) ...@@ -1009,7 +1014,11 @@ CudaNdarray_Subscript(PyObject * py_self, PyObject * key)
CudaNdarray_set_dim(rval, d-1, CudaNdarray_HOST_DIMS(self)[d]); CudaNdarray_set_dim(rval, d-1, CudaNdarray_HOST_DIMS(self)[d]);
} }
} }
else if (PySlice_Check(key)) //INDEXING BY SLICE else
{
PyErr_Clear();
}
if (PySlice_Check(key)) //INDEXING BY SLICE
{ {
if (self->nd == 0) if (self->nd == 0)
{ {
...@@ -1057,7 +1066,7 @@ CudaNdarray_Subscript(PyObject * py_self, PyObject * key) ...@@ -1057,7 +1066,7 @@ CudaNdarray_Subscript(PyObject * py_self, PyObject * key)
CudaNdarray_set_dim(rval, d, CudaNdarray_HOST_DIMS(self)[d]); CudaNdarray_set_dim(rval, d, CudaNdarray_HOST_DIMS(self)[d]);
} }
} }
else if (PyTuple_Check(key)) //INDEXING BY TUPLE if (PyTuple_Check(key)) //INDEXING BY TUPLE
{ {
//elements of the tuple can be either integers or slices //elements of the tuple can be either integers or slices
//the dimensionality of the view we will return is diminished for each slice in the tuple //the dimensionality of the view we will return is diminished for each slice in the tuple
...@@ -1127,9 +1136,11 @@ CudaNdarray_Subscript(PyObject * py_self, PyObject * key) ...@@ -1127,9 +1136,11 @@ CudaNdarray_Subscript(PyObject * py_self, PyObject * key)
} }
++rval_d; ++rval_d;
} }
else if (PyInt_Check(key_d)) else if ((intobj=PyNumber_Int(key_d)))
{ {
int d_idx = PyInt_AsLong(key_d); int d_idx = PyInt_AsLong(intobj);
Py_DECREF(intobj);
intobj = NULL;
int d_dim = CudaNdarray_HOST_DIMS(self)[d]; int d_dim = CudaNdarray_HOST_DIMS(self)[d];
if ((d_idx >= 0) && (d_idx < d_dim)) if ((d_idx >= 0) && (d_idx < d_dim))
...@@ -1151,6 +1162,7 @@ CudaNdarray_Subscript(PyObject * py_self, PyObject * key) ...@@ -1151,6 +1162,7 @@ CudaNdarray_Subscript(PyObject * py_self, PyObject * key)
} }
else else
{ {
PyErr_Clear(); // clear the error set by PyNumber_Int
PyErr_SetString(PyExc_IndexError, "index must be either int or slice"); PyErr_SetString(PyExc_IndexError, "index must be either int or slice");
Py_DECREF(rval); Py_DECREF(rval);
return NULL; return NULL;
...@@ -1158,16 +1170,16 @@ CudaNdarray_Subscript(PyObject * py_self, PyObject * key) ...@@ -1158,16 +1170,16 @@ CudaNdarray_Subscript(PyObject * py_self, PyObject * key)
} }
} }
} }
else
{
PyErr_SetString(PyExc_NotImplementedError, "Unknown key type");
return NULL;
}
if (py_rval) if (py_rval)
{ {
if (verbose) fprint_CudaNdarray(stderr, self); if (verbose) fprint_CudaNdarray(stderr, self);
if (verbose) fprint_CudaNdarray(stderr, rval); if (verbose) fprint_CudaNdarray(stderr, rval);
} }
else
{
PyErr_SetString(PyExc_NotImplementedError, "Unknown key type");
return NULL;
}
return py_rval; return py_rval;
} }
...@@ -1776,6 +1788,10 @@ int CudaNdarray_CopyFromCudaNdarray(CudaNdarray * self, CudaNdarray * other) ...@@ -1776,6 +1788,10 @@ int CudaNdarray_CopyFromCudaNdarray(CudaNdarray * self, CudaNdarray * other)
} }
size *= (unsigned int) CudaNdarray_HOST_DIMS(self)[i]; size *= (unsigned int) CudaNdarray_HOST_DIMS(self)[i];
} }
if (0 == size)
{
return 0; //nothing to copy, we're done.
}
if (CudaNdarray_is_c_contiguous(self) && CudaNdarray_is_c_contiguous(other)) if (CudaNdarray_is_c_contiguous(self) && CudaNdarray_is_c_contiguous(other))
{ {
cublasScopy(size, CudaNdarray_DEV_DATA(other), 1, CudaNdarray_DEV_DATA(self), 1); cublasScopy(size, CudaNdarray_DEV_DATA(other), 1, CudaNdarray_DEV_DATA(self), 1);
......
差异被折叠。
...@@ -1257,7 +1257,7 @@ class Prepend_scalar_constant_to_each_row(gof.Op): ...@@ -1257,7 +1257,7 @@ class Prepend_scalar_constant_to_each_row(gof.Op):
def __eq__(self, other): def __eq__(self, other):
return (type(self) == type(other)) and (self.val == other.val) return (type(self) == type(other)) and (self.val == other.val)
def __hash__(self): def __hash__(self):
return tensor.hashtype(self) ^ hash(self.val.value) return tensor.hashtype(self) ^ hash(self.val.data)
def __str__(self): def __str__(self):
return '%s{%s}'%(self.__class__.__name__,self.val) return '%s{%s}'%(self.__class__.__name__,self.val)
......
...@@ -610,6 +610,43 @@ def local_alloc_unary(node): ...@@ -610,6 +610,43 @@ def local_alloc_unary(node):
return [T.alloc(T.cast(v, node.outputs[0].dtype), *shp)] return [T.alloc(T.cast(v, node.outputs[0].dtype), *shp)]
############################
# Constant Canonicalization
############################
@register_canonicalize
@gof.local_optimizer([])
def local_upcast_elemwise_constant_inputs(node):
"""This explicitly upcasts constant inputs to elemwise Ops, when those Ops do implicit upcasting anyway.
Rationale: it helps merge things like (1-x) and (1.0 - x).
"""
if isinstance(node.op, T.Elemwise):
scalar_op = node.op.scalar_op
#print "aa", scalar_op.output_types_preference
if scalar_op.output_types_preference in (T.scal.upgrade_to_float, T.scal.upcast_out):
# this is the kind of op that we can screw with the input dtypes by upcasting
# explicitly
#print "HELLO??"
output_dtype = node.outputs[0].type.dtype
new_inputs = []
for i in node.inputs:
if i.type.dtype == output_dtype:
new_inputs.append(i)
else:
try:
cval_i = get_constant_value(i) # works only for scalars I think
new_inputs.append(T.cast(cval_i, output_dtype))
except:
if isinstance(i, T.TensorConstant): #for the case of a non-scalar
new_inputs.append(T.cast(i, output_dtype))
else:
new_inputs.append(i)
if new_inputs != node.inputs:
return [node.op(*new_inputs)]
################## ##################
# Subtensor opts # # Subtensor opts #
################## ##################
...@@ -1717,6 +1754,7 @@ def local_greedy_distributor(node): ...@@ -1717,6 +1754,7 @@ def local_greedy_distributor(node):
return [rval] return [rval]
register_canonicalize(local_greedy_distributor) register_canonicalize(local_greedy_distributor)
register_stabilize(local_greedy_distributor)
...@@ -1748,6 +1786,7 @@ def constant_folding(node): ...@@ -1748,6 +1786,7 @@ def constant_folding(node):
return msg return msg
register_canonicalize(constant_folding) register_canonicalize(constant_folding)
register_stabilize(constant_folding) # because
register_specialize(constant_folding) register_specialize(constant_folding)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论