提交 f517e1a0 authored 作者: Olivier Delalleau's avatar Olivier Delalleau

Merged

...@@ -2188,7 +2188,7 @@ CudaNdarray_Dot(PyObject* _unused, PyObject* args) ...@@ -2188,7 +2188,7 @@ CudaNdarray_Dot(PyObject* _unused, PyObject* args)
} }
static PyObject * static PyObject *
filter(PyObject* __unsed_self, PyObject *args) // args = (data, broadcastable, strict) filter(PyObject* __unsed_self, PyObject *args) // args = (data, broadcastable, strict, storage)
{ {
/* /*
* TODO: DOC what this function should do in the various cases of * TODO: DOC what this function should do in the various cases of
...@@ -2282,10 +2282,10 @@ filter(PyObject* __unsed_self, PyObject *args) // args = (data, broadcastable, s ...@@ -2282,10 +2282,10 @@ filter(PyObject* __unsed_self, PyObject *args) // args = (data, broadcastable, s
Py_DECREF(rval); Py_DECREF(rval);
rval = NULL; rval = NULL;
} }
}
Py_DECREF(data); Py_DECREF(data);
Py_DECREF(py_data); Py_DECREF(py_data);
Py_DECREF(broadcastable); Py_DECREF(broadcastable);
}
return (PyObject*)rval; return (PyObject*)rval;
} }
} }
...@@ -2490,6 +2490,11 @@ CudaNdarray_new_nd(int nd) ...@@ -2490,6 +2490,11 @@ CudaNdarray_new_nd(int nd)
return (PyObject *) rval; return (PyObject *) rval;
} }
/**
* Initialize 'self' as a view of 'base', with memory storage 'data'
*/
int CudaNdarray_set_device_data(CudaNdarray * self, float * data, PyObject * base) int CudaNdarray_set_device_data(CudaNdarray * self, float * data, PyObject * base)
{ {
if (self->data_allocated) if (self->data_allocated)
......
...@@ -95,6 +95,12 @@ CudaNdarray_Check(const PyObject * ob); ...@@ -95,6 +95,12 @@ CudaNdarray_Check(const PyObject * ob);
int int
CudaNdarray_CheckExact(const PyObject * ob); CudaNdarray_CheckExact(const PyObject * ob);
/**
* Return true for a C-contiguous CudaNdarray, else false
*/
bool
CudaNdarray_is_c_contiguous(const CudaNdarray * self);
/**** /****
* Returns the number of elements necessary in host_structure and dev_structure for a given number of dimensions. * Returns the number of elements necessary in host_structure and dev_structure for a given number of dimensions.
*/ */
...@@ -386,13 +392,25 @@ int CudaNdarray_alloc_contiguous(CudaNdarray *self, const int nd, const inttype ...@@ -386,13 +392,25 @@ int CudaNdarray_alloc_contiguous(CudaNdarray *self, const int nd, const inttype
size = size * dim[i]; size = size * dim[i];
} }
if (self->data_allocated != size) if (CudaNdarray_is_c_contiguous(self) && (self->data_allocated == size))
{ {
if (device_free(self->devdata)) return 0;
}
// The structure of self will be reused with newly allocated memory.
// If self was a view, we should remove the reference to its base.
// (If base was already NULL, the following has no effect.)
Py_XDECREF(self->base);
self->base = NULL;
// If self is a view, do not try to free its memory
if (self->data_allocated && device_free(self->devdata))
{ {
// Does this ever happen?? Do we need to set data_allocated or devdata to 0? self->devdata = NULL;
self->data_allocated = 0;
return -1; return -1;
} }
assert(size>0); assert(size>0);
self->devdata = (float*)device_malloc(size*sizeof(real)); self->devdata = (float*)device_malloc(size*sizeof(real));
if (!self->devdata) if (!self->devdata)
...@@ -408,7 +426,7 @@ int CudaNdarray_alloc_contiguous(CudaNdarray *self, const int nd, const inttype ...@@ -408,7 +426,7 @@ int CudaNdarray_alloc_contiguous(CudaNdarray *self, const int nd, const inttype
self->devdata, self->devdata,
self); self);
self->data_allocated = size; self->data_allocated = size;
}
return 0; return 0;
} }
......
...@@ -6,7 +6,6 @@ ...@@ -6,7 +6,6 @@
import logging import logging
_logger = logging.getLogger('theano.tensor.opt') _logger = logging.getLogger('theano.tensor.opt')
import copy
import operator import operator
import itertools import itertools
import sys import sys
...@@ -574,14 +573,6 @@ class ShapeFeature(object): ...@@ -574,14 +573,6 @@ class ShapeFeature(object):
if hasattr(r.type,"broadcastable") and r.type.broadcastable[i]: if hasattr(r.type,"broadcastable") and r.type.broadcastable[i]:
return self.lscalar_one return self.lscalar_one
# NOTE: This may cause problems bacause the shape is not asserted
# there is an equivalent mechanism to do this, namely
# specify_shape that one should use
# If user provided size
#elif ( hasattr(r.tag,'shape') and
# r.tag.shape is not None and
# r.tag.shape[i] is not None):
# return T.constant(copy.copy(r.tag.shape[i]),dtype='int64')
else: else:
return Shape_i(i).make_node(r).outputs[0] return Shape_i(i).make_node(r).outputs[0]
...@@ -1101,7 +1092,6 @@ def local_alloc_elemwise(node): ...@@ -1101,7 +1092,6 @@ def local_alloc_elemwise(node):
return [node.op(*new)] return [node.op(*new)]
#TODO, global optimizer that lift the assert to the beginning of the graph. #TODO, global optimizer that lift the assert to the beginning of the graph.
#TODO, var.tag.shape to propagate the shape and lower the overhead of this op
#TODO, when all inputs can be optimized do all except one #TODO, when all inputs can be optimized do all except one
theano.configparser.AddConfigVar('experimental.local_alloc_elemwise', theano.configparser.AddConfigVar('experimental.local_alloc_elemwise',
...@@ -2749,13 +2739,7 @@ register_specialize(local_mul_specialize) ...@@ -2749,13 +2739,7 @@ register_specialize(local_mul_specialize)
@gof.local_optimizer([T.add]) @gof.local_optimizer([T.add])
def local_add_specialize(node): def local_add_specialize(node):
def fill_chain(v): def fill_chain(v):
# Not sure why this happens .. but I did not had the time to look
# into it, it probably has something to do with the dtype I'm
# providing the tag.shape of my variable
out = _fill_chain(v, node.inputs) out = _fill_chain(v, node.inputs)
if out[0].dtype != node.outputs[0].dtype:
return [T.cast(out[0], dtype = node.outputs[0].dtype)]
else:
return out return out
#here, we are past the point of canonicalization, so we don't want to put in un-necessary fills. #here, we are past the point of canonicalization, so we don't want to put in un-necessary fills.
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论