提交 9950ce08 authored 作者: Pascal Lamblin's avatar Pascal Lamblin

Merge pull request #1580 from nouiz/deprecated

[MRG]Deprecated
global-include *.txt
global-include *.c
global-include *.cu
global-include *.cuh
global-include *.sh
......
......@@ -67,9 +67,9 @@ you should check the strides and alignment.
if (!%(y)s)
%(fail)s;
{//New scope needed to make compilation work
dtype_%(y)s * y = (dtype_%(y)s*)%(y)s->data;
dtype_%(x)s * x = (dtype_%(x)s*)%(x)s->data;
for (int i = 2; i < %(x)s->dimensions[0]; ++i)
dtype_%(y)s * y = (dtype_%(y)s*)PyArray_DATA(%(y)s);
dtype_%(x)s * x = (dtype_%(x)s*)PyArray_DATA(%(x)s);
for (int i = 2; i < PyArray_DIMS(%(x)s)[0]; ++i)
y[i] = y[i-1]*y[i-2] + x[i];
}
""" % locals()
......
......@@ -420,7 +420,9 @@ TensorVariable
.. class:: _tensor_py_operators(object)
This mix-in class adds convenient attributes, methods, and support for Python operators (see :ref:`tensor_operator_support`).
This mix-in class adds convenient attributes, methods, and support
to TensorVariable, TensorConstant and TensorSharedVariable for
Python operators (see :ref:`tensor_operator_support`).
.. attribute:: type
......@@ -472,6 +474,10 @@ TensorVariable
See :func:`flatten`.
.. method:: ravel()
return self.flatten(). For NumPy compatibility.
.. attribute:: T
Transpose of this tensor.
......@@ -485,8 +491,31 @@ TensorVariable
same vector! Use `reshape` or `dimshuffle` to turn your vector
into a row or column matrix.
.. method:: {any,all}(axis=None, keepdims=False)
.. method:: {sum,prod,mean}(axis=None, dtype=None, keepdims=False, acc_dtype=None)
.. method:: {var,std,min,max,argmin,argmax}(axis=None, keepdims=False),
.. method:: diagonal(offset=0, axis1=0, axis2=1)
.. method:: astype(dtype)
.. method:: take(indices, axis=None, mode='raise')
.. method:: copy()
.. method:: norm(L, axis=None)
.. method:: nonzero(self, return_matrix=False)
.. method:: nonzero_values(self)
.. method:: sort(self, axis=-1, kind='quicksort', order=None)
.. method:: argsort(self, axis=-1, kind='quicksort', order=None)
.. method:: clip(self, a_min, a_max)
.. method:: conf()
.. method:: repeat(repeats, axis=None)
.. method:: round(mode="half_away_from_zero")
.. method:: trace()
.. method:: get_scalar_constant_value()
.. method:: zeros_like(model, dtype=None)
All the above methods are equivalent to NumPy for Theano on the current tensor.
.. method:: __{abs,neg,lt,le,gt,ge,invert,and,or,add,sub,mul,div,truediv,floordiv}__
Those elemwise operation are supported via Python syntax.
Shaping and Shuffling
=====================
......
......@@ -155,11 +155,11 @@ class WeirdBrokenOp(gof.Op):
prep_vars = """
//the output array has size M x N
npy_intp M = PyArray_DIMS(%(a)s)[0];
npy_intp Sa = %(a)s->strides[0] / PyArray_DESCR(%(a)s)->elsize;
npy_intp Sz = %(z)s->strides[0] / PyArray_DESCR(%(z)s)->elsize;
npy_intp Sa = PyArray_STRIDES(%(a)s)[0] / PyArray_DESCR(%(a)s)->elsize;
npy_intp Sz = PyArray_STRIDES(%(z)s)[0] / PyArray_DESCR(%(z)s)->elsize;
npy_double * Da = (npy_double*)%(a)s->data;
npy_double * Dz = (npy_double*)%(z)s->data;
npy_double * Da = (npy_double*)PyArray_BYTES(%(a)s);
npy_double * Dz = (npy_double*)PyArray_BYTES(%(z)s);
//clear the output array
for (npy_intp m = 0; m < M; ++m)
......
......@@ -1693,7 +1693,7 @@ class GCC_compiler(object):
#to use the new API, but not everywhere. When finished, enable
#the following macro to assert that we don't bring new code
#that use the old API.
#cxxflags.append("-D NPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION")
cxxflags.append("-D NPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION")
numpy_ver = [int(n) for n in numpy.__version__.split('.')[:2]]
# numpy 1.7 deprecated the following macro but the new one didn't
......
......@@ -76,10 +76,7 @@ except ImportError:
except ImportError:
_logger.info("Compiling new CVM")
dirname = 'lazylinker_ext'
# We use a .txt extensions as otherwise it don't get
# included when we create a package to send to pypi
# This happen even if we tell to include *.c files
cfile = os.path.join(theano.__path__[0], 'gof', 'lazylinker_c.c.txt')
cfile = os.path.join(theano.__path__[0], 'gof', 'lazylinker_c.c')
code = open(cfile).read()
loc = os.path.join(config.compiledir, dirname)
if not os.path.exists(loc):
......
......@@ -220,6 +220,7 @@ if __name__ == "__main__":
GTX 650 Ti 0.27s
GTX 460 0.37s 0.45s
GTX 285 0.42s 0.452s 0.452s 0.40s # cuda 3.0 seems faster? driver version?
750M 0.49s
GTX 550 Ti 0.57s
GT 520 2.68s 3.06s
520M 2.44s 3.19s # with bumblebee on Ubuntu 12.04
......
......@@ -2223,12 +2223,6 @@ class GpuReshape(tensor.Reshape, GpuOp):
out[0] = x.reshape(tuple(shp))
# C Code shared by GpuSubtensor and GpuIncSubtensor
_define_set_data = """
#define CudaNdarray_set_device_data2(obj, ptr, base) \
CudaNdarray_set_device_data(obj, (float *)ptr, base)
"""
class GpuSubtensor(GpuOp, tensor.Subtensor):
"""
Implement subtensor on the gpu.
......@@ -2276,16 +2270,27 @@ class GpuSubtensor(GpuOp, tensor.Subtensor):
view_ndim = node.outputs[0].ndim
fail = sub['fail']
decl = "CudaNdarray* xview = NULL;"
get_xview = self.helper_c_code(node, name, inputs, outputs, sub,
self.idx_list,
view_ndim=view_ndim,
c_prefix='CudaNdarray',
strides_mul=4,
)
build_view = """
//TODO: give this Op a second output so that this view can be cached
//TODO: alternatively, fix the memory leak on failure
CudaNdarray* xview = (CudaNdarray*) CudaNdarray_New(%(view_ndim)s);
xview = (CudaNdarray*) CudaNdarray_New(%(view_ndim)s);
if (!xview)
{
%(fail)s;
}
if (CudaNdarray_set_device_data(xview, CudaNdarray_DEV_DATA(%(x)s),
(PyObject*) NULL))
if (CudaNdarray_set_device_data(
xview,
CudaNdarray_DEV_DATA(%(x)s) + xview_offset/4,
(PyObject*) %(x)s))
{
PyErr_Format(PyExc_RuntimeError,
"GpuSubtensor is not able to set the"
......@@ -2294,43 +2299,24 @@ class GpuSubtensor(GpuOp, tensor.Subtensor):
%(fail)s;
}
cnda_mark_dev_structure_dirty(xview);
""" % locals()
get_xview = _define_set_data + \
self.helper_c_code(node, name, inputs, outputs, sub,
self.idx_list,
c_prefix='CudaNdarray',
set_data='CudaNdarray_set_device_data2',
set_dim='CudaNdarray_set_dim',
set_stride='CudaNdarray_set_stride',
update_flags="", strides_mul=4)
finish_view = ""
#For broadcasted dimensions, set the strides to 0
#We can't do that only for broadcasted dimensions as this can happen for dimensions of size 0,
#That are rebroadcated later.
for idx in range(node.outputs[0].ndim):
finish_view += """
if(CudaNdarray_HOST_DIMS(xview)[%(idx)s]==1)
CudaNdarray_set_stride(xview, %(idx)s, 0);
""" % locals()
finish_view += """
//Set the base only now
if(CudaNdarray_set_device_data(xview, CudaNdarray_DEV_DATA(xview),
%(x)s)){
PyErr_Format(PyExc_RuntimeError,
"GpuSubtensor is not able to set"
" the base of the view array");
Py_XDECREF(xview);
%(fail)s;
for(int idx=0;idx <%(view_ndim)s; idx++){
//For broadcasted dimensions, set the strides to 0
//We can't do that only for broadcasted dimensions as this can happen
//for dimensions of size 0. That are rebroadcated later.
if(xview_dims[idx]==1)
CudaNdarray_set_stride(xview, idx, 0);
else
CudaNdarray_set_stride(xview, idx, xview_strides[idx]);
CudaNdarray_set_dim(xview, idx, xview_dims[idx]);
}
""" % locals()
finish_view = """
Py_XDECREF(%(z)s);
%(z)s = xview;
""" % locals()
return build_view + "{" + get_xview + "}" + finish_view
return decl + get_xview + build_view + finish_view
def c_code_cache_version(self):
hv = self.helper_c_code_cache_version()
......@@ -2719,6 +2705,7 @@ class GpuAdvancedIncSubtensor1_dev20(GpuAdvancedIncSubtensor1):
""" %locals()
class GpuIncSubtensor(tensor.IncSubtensor, GpuOp):
"""
Implement IncSubtensor on the gpu.
......@@ -2756,6 +2743,9 @@ class GpuIncSubtensor(tensor.IncSubtensor, GpuOp):
"""
return """(CudaNdarray*) CudaNdarray_Copy(%(x)s)""" % locals()
def decl_view(self):
return "CudaNdarray* zview = NULL;"
def make_view_array(self, x, view_ndim):
"""
:param x: a string identifying an array to be viewed
......@@ -2765,17 +2755,32 @@ class GpuIncSubtensor(tensor.IncSubtensor, GpuOp):
This doesn't need to actually set up the view with the
right indexing; we'll do that manually later.
"""
return """CudaNdarray* zview = (CudaNdarray*)
CudaNdarray_New(%(view_ndim)s)""" % locals()
ret = """zview = (CudaNdarray*) CudaNdarray_New(%(view_ndim)s);
if (CudaNdarray_set_device_data(
zview,
CudaNdarray_DEV_DATA(%(x)s) + xview_offset/4,
(PyObject*) %(x)s))
{
zview = NULL;
PyErr_Format(PyExc_RuntimeError,
"GpuSubtensor is not able to set the"
" devdata field of the view");
}else{
cnda_mark_dev_structure_dirty(zview);
for(int idx=0;idx <%(view_ndim)s; idx++){
if(xview_dims[idx]==1)
CudaNdarray_set_stride(zview, idx, 0);
else
CudaNdarray_set_stride(zview, idx, xview_strides[idx]);
CudaNdarray_set_dim(zview, idx, xview_dims[idx]);
}
}
""" % locals()
return ret
def get_helper_c_code_args(self):
""" Return a dictionary of arguments to use with helper_c_code"""
return { 'update_flags' : "",
'c_prefix' : 'CudaNdarray',
'set_data' :'CudaNdarray_set_device_data2',
'set_dim' : 'CudaNdarray_set_dim',
'set_stride' : 'CudaNdarray_set_stride',
'update_flags' : "",
return {'c_prefix': 'CudaNdarray',
'strides_mul': 4
}
......@@ -2789,24 +2794,6 @@ class GpuIncSubtensor(tensor.IncSubtensor, GpuOp):
"""
return """CudaNdarray_CopyFromCudaNdarray(%(view)s, %(source)s)""" % locals()
def define_set_data(self):
return _define_set_data
def link_view_array(self, x, fail):
return """
if (CudaNdarray_set_device_data(zview, CudaNdarray_DEV_DATA(%(x)s),
(PyObject*) NULL))
{
PyErr_Format(PyExc_RuntimeError,
"GpuSubtensor is not able to set the"
" devdata field of the view");
Py_XDECREF(zview);
%(fail)s;
}
cnda_mark_dev_structure_dirty(zview);
""" % locals()
def set_view_base(self, x, fail):
return """
//Set the base only now
......@@ -2823,9 +2810,8 @@ class GpuIncSubtensor(tensor.IncSubtensor, GpuOp):
def add_to_zview(self, x, fail):
return """
PyObject * add_result = CudaNdarray_inplace_add((PyObject *) zview,
(PyObject *) py_%(x)s);
PyObject * add_result = CudaNdarray_inplace_add((PyObject *) zview,
(PyObject *) py_%(x)s);
if (! add_result )
{
......@@ -2839,7 +2825,6 @@ class GpuIncSubtensor(tensor.IncSubtensor, GpuOp):
""" % locals()
def c_code_cache_version(self):
parent_version = super(GpuIncSubtensor, self).c_code_cache_version()
if parent_version:
return parent_version + (0,)
......
差异被折叠。
This source diff could not be displayed because it is too large. You can view the blob instead.
差异被折叠。
差异被折叠。
差异被折叠。
差异被折叠。
差异被折叠。
差异被折叠。
差异被折叠。
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论