提交 9950ce08 authored 作者: Pascal Lamblin's avatar Pascal Lamblin

Merge pull request #1580 from nouiz/deprecated

[MRG]Deprecated
global-include *.txt global-include *.txt
global-include *.c
global-include *.cu global-include *.cu
global-include *.cuh global-include *.cuh
global-include *.sh global-include *.sh
......
...@@ -67,9 +67,9 @@ you should check the strides and alignment. ...@@ -67,9 +67,9 @@ you should check the strides and alignment.
if (!%(y)s) if (!%(y)s)
%(fail)s; %(fail)s;
{//New scope needed to make compilation work {//New scope needed to make compilation work
dtype_%(y)s * y = (dtype_%(y)s*)%(y)s->data; dtype_%(y)s * y = (dtype_%(y)s*)PyArray_DATA(%(y)s);
dtype_%(x)s * x = (dtype_%(x)s*)%(x)s->data; dtype_%(x)s * x = (dtype_%(x)s*)PyArray_DATA(%(x)s);
for (int i = 2; i < %(x)s->dimensions[0]; ++i) for (int i = 2; i < PyArray_DIMS(%(x)s)[0]; ++i)
y[i] = y[i-1]*y[i-2] + x[i]; y[i] = y[i-1]*y[i-2] + x[i];
} }
""" % locals() """ % locals()
......
...@@ -420,7 +420,9 @@ TensorVariable ...@@ -420,7 +420,9 @@ TensorVariable
.. class:: _tensor_py_operators(object) .. class:: _tensor_py_operators(object)
This mix-in class adds convenient attributes, methods, and support for Python operators (see :ref:`tensor_operator_support`). This mix-in class adds convenient attributes, methods, and support
to TensorVariable, TensorConstant and TensorSharedVariable for
Python operators (see :ref:`tensor_operator_support`).
.. attribute:: type .. attribute:: type
...@@ -472,6 +474,10 @@ TensorVariable ...@@ -472,6 +474,10 @@ TensorVariable
See :func:`flatten`. See :func:`flatten`.
.. method:: ravel()
return self.flatten(). For NumPy compatibility.
.. attribute:: T .. attribute:: T
Transpose of this tensor. Transpose of this tensor.
...@@ -485,8 +491,31 @@ TensorVariable ...@@ -485,8 +491,31 @@ TensorVariable
same vector! Use `reshape` or `dimshuffle` to turn your vector same vector! Use `reshape` or `dimshuffle` to turn your vector
into a row or column matrix. into a row or column matrix.
.. method:: {any,all}(axis=None, keepdims=False)
.. method:: {sum,prod,mean}(axis=None, dtype=None, keepdims=False, acc_dtype=None)
.. method:: {var,std,min,max,argmin,argmax}(axis=None, keepdims=False),
.. method:: diagonal(offset=0, axis1=0, axis2=1)
.. method:: astype(dtype)
.. method:: take(indices, axis=None, mode='raise')
.. method:: copy()
.. method:: norm(L, axis=None)
.. method:: nonzero(self, return_matrix=False)
.. method:: nonzero_values(self)
.. method:: sort(self, axis=-1, kind='quicksort', order=None)
.. method:: argsort(self, axis=-1, kind='quicksort', order=None)
.. method:: clip(self, a_min, a_max)
.. method:: conf()
.. method:: repeat(repeats, axis=None)
.. method:: round(mode="half_away_from_zero")
.. method:: trace()
.. method:: get_scalar_constant_value()
.. method:: zeros_like(model, dtype=None)
All the above methods are equivalent to NumPy for Theano on the current tensor.
.. method:: __{abs,neg,lt,le,gt,ge,invert,and,or,add,sub,mul,div,truediv,floordiv}__
Those elemwise operation are supported via Python syntax.
Shaping and Shuffling Shaping and Shuffling
===================== =====================
......
...@@ -155,11 +155,11 @@ class WeirdBrokenOp(gof.Op): ...@@ -155,11 +155,11 @@ class WeirdBrokenOp(gof.Op):
prep_vars = """ prep_vars = """
//the output array has size M x N //the output array has size M x N
npy_intp M = PyArray_DIMS(%(a)s)[0]; npy_intp M = PyArray_DIMS(%(a)s)[0];
npy_intp Sa = %(a)s->strides[0] / PyArray_DESCR(%(a)s)->elsize; npy_intp Sa = PyArray_STRIDES(%(a)s)[0] / PyArray_DESCR(%(a)s)->elsize;
npy_intp Sz = %(z)s->strides[0] / PyArray_DESCR(%(z)s)->elsize; npy_intp Sz = PyArray_STRIDES(%(z)s)[0] / PyArray_DESCR(%(z)s)->elsize;
npy_double * Da = (npy_double*)%(a)s->data; npy_double * Da = (npy_double*)PyArray_BYTES(%(a)s);
npy_double * Dz = (npy_double*)%(z)s->data; npy_double * Dz = (npy_double*)PyArray_BYTES(%(z)s);
//clear the output array //clear the output array
for (npy_intp m = 0; m < M; ++m) for (npy_intp m = 0; m < M; ++m)
......
...@@ -1693,7 +1693,7 @@ class GCC_compiler(object): ...@@ -1693,7 +1693,7 @@ class GCC_compiler(object):
#to use the new API, but not everywhere. When finished, enable #to use the new API, but not everywhere. When finished, enable
#the following macro to assert that we don't bring new code #the following macro to assert that we don't bring new code
#that use the old API. #that use the old API.
#cxxflags.append("-D NPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION") cxxflags.append("-D NPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION")
numpy_ver = [int(n) for n in numpy.__version__.split('.')[:2]] numpy_ver = [int(n) for n in numpy.__version__.split('.')[:2]]
# numpy 1.7 deprecated the following macro but the new one didn't # numpy 1.7 deprecated the following macro but the new one didn't
......
...@@ -76,10 +76,7 @@ except ImportError: ...@@ -76,10 +76,7 @@ except ImportError:
except ImportError: except ImportError:
_logger.info("Compiling new CVM") _logger.info("Compiling new CVM")
dirname = 'lazylinker_ext' dirname = 'lazylinker_ext'
# We use a .txt extensions as otherwise it don't get cfile = os.path.join(theano.__path__[0], 'gof', 'lazylinker_c.c')
# included when we create a package to send to pypi
# This happen even if we tell to include *.c files
cfile = os.path.join(theano.__path__[0], 'gof', 'lazylinker_c.c.txt')
code = open(cfile).read() code = open(cfile).read()
loc = os.path.join(config.compiledir, dirname) loc = os.path.join(config.compiledir, dirname)
if not os.path.exists(loc): if not os.path.exists(loc):
......
...@@ -220,6 +220,7 @@ if __name__ == "__main__": ...@@ -220,6 +220,7 @@ if __name__ == "__main__":
GTX 650 Ti 0.27s GTX 650 Ti 0.27s
GTX 460 0.37s 0.45s GTX 460 0.37s 0.45s
GTX 285 0.42s 0.452s 0.452s 0.40s # cuda 3.0 seems faster? driver version? GTX 285 0.42s 0.452s 0.452s 0.40s # cuda 3.0 seems faster? driver version?
750M 0.49s
GTX 550 Ti 0.57s GTX 550 Ti 0.57s
GT 520 2.68s 3.06s GT 520 2.68s 3.06s
520M 2.44s 3.19s # with bumblebee on Ubuntu 12.04 520M 2.44s 3.19s # with bumblebee on Ubuntu 12.04
......
...@@ -2223,12 +2223,6 @@ class GpuReshape(tensor.Reshape, GpuOp): ...@@ -2223,12 +2223,6 @@ class GpuReshape(tensor.Reshape, GpuOp):
out[0] = x.reshape(tuple(shp)) out[0] = x.reshape(tuple(shp))
# C Code shared by GpuSubtensor and GpuIncSubtensor
_define_set_data = """
#define CudaNdarray_set_device_data2(obj, ptr, base) \
CudaNdarray_set_device_data(obj, (float *)ptr, base)
"""
class GpuSubtensor(GpuOp, tensor.Subtensor): class GpuSubtensor(GpuOp, tensor.Subtensor):
""" """
Implement subtensor on the gpu. Implement subtensor on the gpu.
...@@ -2276,16 +2270,27 @@ class GpuSubtensor(GpuOp, tensor.Subtensor): ...@@ -2276,16 +2270,27 @@ class GpuSubtensor(GpuOp, tensor.Subtensor):
view_ndim = node.outputs[0].ndim view_ndim = node.outputs[0].ndim
fail = sub['fail'] fail = sub['fail']
decl = "CudaNdarray* xview = NULL;"
get_xview = self.helper_c_code(node, name, inputs, outputs, sub,
self.idx_list,
view_ndim=view_ndim,
c_prefix='CudaNdarray',
strides_mul=4,
)
build_view = """ build_view = """
//TODO: give this Op a second output so that this view can be cached //TODO: give this Op a second output so that this view can be cached
//TODO: alternatively, fix the memory leak on failure //TODO: alternatively, fix the memory leak on failure
CudaNdarray* xview = (CudaNdarray*) CudaNdarray_New(%(view_ndim)s); xview = (CudaNdarray*) CudaNdarray_New(%(view_ndim)s);
if (!xview) if (!xview)
{ {
%(fail)s; %(fail)s;
} }
if (CudaNdarray_set_device_data(xview, CudaNdarray_DEV_DATA(%(x)s),
(PyObject*) NULL)) if (CudaNdarray_set_device_data(
xview,
CudaNdarray_DEV_DATA(%(x)s) + xview_offset/4,
(PyObject*) %(x)s))
{ {
PyErr_Format(PyExc_RuntimeError, PyErr_Format(PyExc_RuntimeError,
"GpuSubtensor is not able to set the" "GpuSubtensor is not able to set the"
...@@ -2294,43 +2299,24 @@ class GpuSubtensor(GpuOp, tensor.Subtensor): ...@@ -2294,43 +2299,24 @@ class GpuSubtensor(GpuOp, tensor.Subtensor):
%(fail)s; %(fail)s;
} }
cnda_mark_dev_structure_dirty(xview); cnda_mark_dev_structure_dirty(xview);
""" % locals() for(int idx=0;idx <%(view_ndim)s; idx++){
//For broadcasted dimensions, set the strides to 0
get_xview = _define_set_data + \ //We can't do that only for broadcasted dimensions as this can happen
self.helper_c_code(node, name, inputs, outputs, sub, //for dimensions of size 0. That are rebroadcated later.
self.idx_list, if(xview_dims[idx]==1)
c_prefix='CudaNdarray', CudaNdarray_set_stride(xview, idx, 0);
set_data='CudaNdarray_set_device_data2', else
set_dim='CudaNdarray_set_dim', CudaNdarray_set_stride(xview, idx, xview_strides[idx]);
set_stride='CudaNdarray_set_stride', CudaNdarray_set_dim(xview, idx, xview_dims[idx]);
update_flags="", strides_mul=4)
finish_view = ""
#For broadcasted dimensions, set the strides to 0
#We can't do that only for broadcasted dimensions as this can happen for dimensions of size 0,
#That are rebroadcated later.
for idx in range(node.outputs[0].ndim):
finish_view += """
if(CudaNdarray_HOST_DIMS(xview)[%(idx)s]==1)
CudaNdarray_set_stride(xview, %(idx)s, 0);
""" % locals()
finish_view += """
//Set the base only now
if(CudaNdarray_set_device_data(xview, CudaNdarray_DEV_DATA(xview),
%(x)s)){
PyErr_Format(PyExc_RuntimeError,
"GpuSubtensor is not able to set"
" the base of the view array");
Py_XDECREF(xview);
%(fail)s;
} }
""" % locals()
finish_view = """
Py_XDECREF(%(z)s); Py_XDECREF(%(z)s);
%(z)s = xview; %(z)s = xview;
""" % locals() """ % locals()
return build_view + "{" + get_xview + "}" + finish_view return decl + get_xview + build_view + finish_view
def c_code_cache_version(self): def c_code_cache_version(self):
hv = self.helper_c_code_cache_version() hv = self.helper_c_code_cache_version()
...@@ -2719,6 +2705,7 @@ class GpuAdvancedIncSubtensor1_dev20(GpuAdvancedIncSubtensor1): ...@@ -2719,6 +2705,7 @@ class GpuAdvancedIncSubtensor1_dev20(GpuAdvancedIncSubtensor1):
""" %locals() """ %locals()
class GpuIncSubtensor(tensor.IncSubtensor, GpuOp): class GpuIncSubtensor(tensor.IncSubtensor, GpuOp):
""" """
Implement IncSubtensor on the gpu. Implement IncSubtensor on the gpu.
...@@ -2756,6 +2743,9 @@ class GpuIncSubtensor(tensor.IncSubtensor, GpuOp): ...@@ -2756,6 +2743,9 @@ class GpuIncSubtensor(tensor.IncSubtensor, GpuOp):
""" """
return """(CudaNdarray*) CudaNdarray_Copy(%(x)s)""" % locals() return """(CudaNdarray*) CudaNdarray_Copy(%(x)s)""" % locals()
def decl_view(self):
return "CudaNdarray* zview = NULL;"
def make_view_array(self, x, view_ndim): def make_view_array(self, x, view_ndim):
""" """
:param x: a string identifying an array to be viewed :param x: a string identifying an array to be viewed
...@@ -2765,17 +2755,32 @@ class GpuIncSubtensor(tensor.IncSubtensor, GpuOp): ...@@ -2765,17 +2755,32 @@ class GpuIncSubtensor(tensor.IncSubtensor, GpuOp):
This doesn't need to actually set up the view with the This doesn't need to actually set up the view with the
right indexing; we'll do that manually later. right indexing; we'll do that manually later.
""" """
return """CudaNdarray* zview = (CudaNdarray*) ret = """zview = (CudaNdarray*) CudaNdarray_New(%(view_ndim)s);
CudaNdarray_New(%(view_ndim)s)""" % locals() if (CudaNdarray_set_device_data(
zview,
CudaNdarray_DEV_DATA(%(x)s) + xview_offset/4,
(PyObject*) %(x)s))
{
zview = NULL;
PyErr_Format(PyExc_RuntimeError,
"GpuSubtensor is not able to set the"
" devdata field of the view");
}else{
cnda_mark_dev_structure_dirty(zview);
for(int idx=0;idx <%(view_ndim)s; idx++){
if(xview_dims[idx]==1)
CudaNdarray_set_stride(zview, idx, 0);
else
CudaNdarray_set_stride(zview, idx, xview_strides[idx]);
CudaNdarray_set_dim(zview, idx, xview_dims[idx]);
}
}
""" % locals()
return ret
def get_helper_c_code_args(self): def get_helper_c_code_args(self):
""" Return a dictionary of arguments to use with helper_c_code""" """ Return a dictionary of arguments to use with helper_c_code"""
return { 'update_flags' : "", return {'c_prefix': 'CudaNdarray',
'c_prefix' : 'CudaNdarray',
'set_data' :'CudaNdarray_set_device_data2',
'set_dim' : 'CudaNdarray_set_dim',
'set_stride' : 'CudaNdarray_set_stride',
'update_flags' : "",
'strides_mul': 4 'strides_mul': 4
} }
...@@ -2789,24 +2794,6 @@ class GpuIncSubtensor(tensor.IncSubtensor, GpuOp): ...@@ -2789,24 +2794,6 @@ class GpuIncSubtensor(tensor.IncSubtensor, GpuOp):
""" """
return """CudaNdarray_CopyFromCudaNdarray(%(view)s, %(source)s)""" % locals() return """CudaNdarray_CopyFromCudaNdarray(%(view)s, %(source)s)""" % locals()
def define_set_data(self):
return _define_set_data
def link_view_array(self, x, fail):
return """
if (CudaNdarray_set_device_data(zview, CudaNdarray_DEV_DATA(%(x)s),
(PyObject*) NULL))
{
PyErr_Format(PyExc_RuntimeError,
"GpuSubtensor is not able to set the"
" devdata field of the view");
Py_XDECREF(zview);
%(fail)s;
}
cnda_mark_dev_structure_dirty(zview);
""" % locals()
def set_view_base(self, x, fail): def set_view_base(self, x, fail):
return """ return """
//Set the base only now //Set the base only now
...@@ -2823,9 +2810,8 @@ class GpuIncSubtensor(tensor.IncSubtensor, GpuOp): ...@@ -2823,9 +2810,8 @@ class GpuIncSubtensor(tensor.IncSubtensor, GpuOp):
def add_to_zview(self, x, fail): def add_to_zview(self, x, fail):
return """ return """
PyObject * add_result = CudaNdarray_inplace_add((PyObject *) zview,
PyObject * add_result = CudaNdarray_inplace_add((PyObject *) zview, (PyObject *) py_%(x)s);
(PyObject *) py_%(x)s);
if (! add_result ) if (! add_result )
{ {
...@@ -2839,7 +2825,6 @@ class GpuIncSubtensor(tensor.IncSubtensor, GpuOp): ...@@ -2839,7 +2825,6 @@ class GpuIncSubtensor(tensor.IncSubtensor, GpuOp):
""" % locals() """ % locals()
def c_code_cache_version(self): def c_code_cache_version(self):
parent_version = super(GpuIncSubtensor, self).c_code_cache_version() parent_version = super(GpuIncSubtensor, self).c_code_cache_version()
if parent_version: if parent_version:
return parent_version + (0,) return parent_version + (0,)
......
...@@ -5,13 +5,14 @@ Generator code in SSJ package (L'Ecuyer & Simard) ...@@ -5,13 +5,14 @@ Generator code in SSJ package (L'Ecuyer & Simard)
http://www.iro.umontreal.ca/~simardr/ssj/indexe.html http://www.iro.umontreal.ca/~simardr/ssj/indexe.html
""" """
import sys, warnings import warnings
import numpy import numpy
from theano import Op, Apply, shared, config, Variable from theano import Op, Apply, shared, config, Variable
from theano.tensor import (raw_random, TensorType, as_tensor_variable, from theano.tensor import (raw_random, TensorType, as_tensor_variable,
get_vector_length, cast, opt, scal) get_vector_length, cast, opt, scal)
from theano.tensor import zeros_like, sqrt, log, sin, cos, join, prod from theano.tensor import sqrt, log, sin, cos, join, prod
from theano.compile import optdb from theano.compile import optdb
from theano.gof import local_optimizer from theano.gof import local_optimizer
from theano.gof.python25 import all, any from theano.gof.python25 import all, any
...@@ -36,6 +37,7 @@ def matVecModM(A, s, m): ...@@ -36,6 +37,7 @@ def matVecModM(A, s, m):
x[i] = r + m x[i] = r + m
return x return x
def multMatVect(v, A, m1, B, m2): def multMatVect(v, A, m1, B, m2):
#multiply the first half of v by A with a modulo of m1 #multiply the first half of v by A with a modulo of m1
#and the second half by B with a modulo of m2 #and the second half by B with a modulo of m2
...@@ -63,25 +65,27 @@ A1p0 = numpy.asarray([[0, 4194304, 129], [1, 0, 0], [0, 1, 0]]) ...@@ -63,25 +65,27 @@ A1p0 = numpy.asarray([[0, 4194304, 129], [1, 0, 0], [0, 1, 0]])
A2p0 = numpy.asarray([[32768, 0, 32769], [1, 0, 0], [0, 1, 0]]) A2p0 = numpy.asarray([[32768, 0, 32769], [1, 0, 0], [0, 1, 0]])
A1p72 = numpy.asarray([[1516919229, 758510237, 499121365], A1p72 = numpy.asarray([[1516919229, 758510237, 499121365],
[1884998244, 1516919229, 335398200], [1884998244, 1516919229, 335398200],
[601897748, 1884998244, 358115744]]) [601897748, 1884998244, 358115744]])
A2p72 = numpy.asarray([[1228857673, 1496414766, 954677935], A2p72 = numpy.asarray([[1228857673, 1496414766, 954677935],
[1133297478, 1407477216, 1496414766], [1133297478, 1407477216, 1496414766],
[2002613992, 1639496704, 1407477216]]) [2002613992, 1639496704, 1407477216]])
A1p134 = numpy.asarray( A1p134 = numpy.asarray(
[[1702500920, 1849582496, 1656874625], [[1702500920, 1849582496, 1656874625],
[828554832, 1702500920, 1512419905], [828554832, 1702500920, 1512419905],
[1143731069, 828554832, 102237247]]) [1143731069, 828554832, 102237247]])
A2p134 = numpy.asarray( A2p134 = numpy.asarray(
[[796789021, 1464208080, 607337906], [[796789021, 1464208080, 607337906],
[1241679051, 1431130166, 1464208080], [1241679051, 1431130166, 1464208080],
[1401213391, 1178684362, 1431130166]]) [1401213391, 1178684362, 1431130166]])
np_int32_vals = [numpy.int32(i) for i in (0, 7, 9, 15, 16, 22, 24)] np_int32_vals = [numpy.int32(i) for i in (0, 7, 9, 15, 16, 22, 24)]
def ff_2p134(rstate): def ff_2p134(rstate):
return multMatVect(rstate, A1p134, M1, A2p134, M2) return multMatVect(rstate, A1p134, M1, A2p134, M2)
def ff_2p72(rstate): def ff_2p72(rstate):
return multMatVect(rstate, A1p72, M1, A2p72, M2) return multMatVect(rstate, A1p72, M1, A2p72, M2)
...@@ -93,8 +97,8 @@ def mrg_next_value(rstate, new_rstate): ...@@ -93,8 +97,8 @@ def mrg_next_value(rstate, new_rstate):
#i0, i7, i9, i15, i16, i22, i24 = [numpy.int32(i) for i in (0, 7, 9, 15, 16, 22, 24)] #i0, i7, i9, i15, i16, i22, i24 = [numpy.int32(i) for i in (0, 7, 9, 15, 16, 22, 24)]
i0, i7, i9, i15, i16, i22, i24 = np_int32_vals i0, i7, i9, i15, i16, i22, i24 = np_int32_vals
#first component #first component
y1 = (((x12 & MASK12) << i22) + (x12 >> i9) y1 = (((x12 & MASK12) << i22) + (x12 >> i9) +
+ ((x13 & MASK13) << i7) + (x13 >> i24)) ((x13 & MASK13) << i7) + (x13 >> i24))
assert type(y1) == numpy.int32 assert type(y1) == numpy.int32
if (y1 < 0 or y1 >= M1): #must also check overflow if (y1 < 0 or y1 >= M1): #must also check overflow
...@@ -135,6 +139,7 @@ def mrg_next_value(rstate, new_rstate): ...@@ -135,6 +139,7 @@ def mrg_next_value(rstate, new_rstate):
else: else:
return (x11 - x21) * NORM return (x11 - x21) * NORM
class mrg_uniform_base(Op): class mrg_uniform_base(Op):
def __init__(self, output_type, inplace=False): def __init__(self, output_type, inplace=False):
Op.__init__(self) Op.__init__(self)
...@@ -145,17 +150,19 @@ class mrg_uniform_base(Op): ...@@ -145,17 +150,19 @@ class mrg_uniform_base(Op):
self.warned_numpy_version = False self.warned_numpy_version = False
def __eq__(self, other): def __eq__(self, other):
return type(self) == type(other) \ return (type(self) == type(other) and
and self.output_type == other.output_type \ self.output_type == other.output_type and
and self.inplace == other.inplace self.inplace == other.inplace)
def __hash__(self): def __hash__(self):
return hash(type(self)) ^ hash(self.output_type) ^ hash(self.inplace) return hash(type(self)) ^ hash(self.output_type) ^ hash(self.inplace)
def __str__(self): def __str__(self):
if self.inplace: if self.inplace:
s = "inplace" s = "inplace"
else: s = "no_inplace" else:
return self.__class__.__name__+"{%s,%s}"%(self.output_type,s) s = "no_inplace"
return self.__class__.__name__ + "{%s,%s}" % (self.output_type, s)
def make_node(self, rstate, size): def make_node(self, rstate, size):
# error checking slightly redundant here, since # error checking slightly redundant here, since
...@@ -163,10 +170,10 @@ class mrg_uniform_base(Op): ...@@ -163,10 +170,10 @@ class mrg_uniform_base(Op):
# #
# call through MRG_RandomStreams instead. # call through MRG_RandomStreams instead.
return Apply(self, return Apply(self,
[rstate, size], [rstate, size],
[rstate.type(), self.output_type()]) [rstate.type(), self.output_type()])
def grad(self,inputs,ograd): def grad(self, inputs, ograd):
return [None for i in inputs] return [None for i in inputs]
def R_op(self, inputs, eval_points): def R_op(self, inputs, eval_points):
...@@ -187,34 +194,35 @@ class mrg_uniform(mrg_uniform_base): ...@@ -187,34 +194,35 @@ class mrg_uniform(mrg_uniform_base):
def perform(self, node, inp, out): def perform(self, node, inp, out):
rstate, size = inp rstate, size = inp
o_rstate, o_sample = out o_rstate, o_sample = out
numpy_version=numpy.__version__.split('.') numpy_version = numpy.__version__.split('.')
if not self.warned_numpy_version and int(numpy_version[0])<=1 and int(numpy_version[1])<3: if not self.warned_numpy_version and int(numpy_version[0]) <= 1 and int(numpy_version[1]) <3 :
print "Warning: you must use numpy version 1.3.0 or higher with the python version of this op. Otherwise numpy leak memory. and numpy" print "Warning: you must use numpy version 1.3.0 or higher with the python version of this op. Otherwise numpy leak memory. and numpy"
self.warned_numpy_version = True self.warned_numpy_version = True
n_elements = 1 n_elements = 1
rstate = numpy.asarray(rstate) # bring state from GPU if necessary rstate = numpy.asarray(rstate) # bring state from GPU if necessary
if not self.inplace: if not self.inplace:
rstate = rstate.copy() rstate = rstate.copy()
for s in size: for s in size:
n_elements *= s n_elements *= s
n_streams,_ = rstate.shape n_streams, _ = rstate.shape
rval = numpy.zeros(n_elements, dtype=self.output_type.dtype) rval = numpy.zeros(n_elements, dtype=self.output_type.dtype)
err_orig = numpy.seterr(over='ignore') err_orig = numpy.seterr(over='ignore')
try: try:
for i in xrange(n_elements): for i in xrange(n_elements):
sample = mrg_next_value(rstate[i%n_streams], rstate[i%n_streams]) sample = mrg_next_value(rstate[i % n_streams],
rstate[i % n_streams])
rval[i] = sample rval[i] = sample
finally: finally:
numpy.seterr(**err_orig) numpy.seterr(**err_orig)
o_rstate[0] = node.outputs[0].type.filter(rstate) # send to GPU if necessary o_rstate[0] = node.outputs[0].type.filter(rstate) # send to GPU if necessary
o_sample[0] = node.outputs[1].type.filter(rval.reshape(size))# send to GPU if necessary o_sample[0] = node.outputs[1].type.filter(rval.reshape(size)) # send to GPU if necessary
def c_code(self, node, name, inp, out, sub): def c_code(self, node, name, inp, out, sub):
rstate, size = inp rstate, size = inp
...@@ -228,7 +236,7 @@ class mrg_uniform(mrg_uniform_base): ...@@ -228,7 +236,7 @@ class mrg_uniform(mrg_uniform_base):
fail = sub['fail'] fail = sub['fail']
if self.output_type.dtype == 'float32': if self.output_type.dtype == 'float32':
otype = 'float' otype = 'float'
NORM = '4.6566126e-10f' #numpy.float32(1.0/(2**31+65)) NORM = '4.6566126e-10f' # numpy.float32(1.0/(2**31+65))
# this was determined by finding the biggest number such that # this was determined by finding the biggest number such that
# numpy.float32(number * M1) < 1.0 # numpy.float32(number * M1) < 1.0
else: else:
...@@ -279,7 +287,7 @@ class mrg_uniform(mrg_uniform_base): ...@@ -279,7 +287,7 @@ class mrg_uniform(mrg_uniform_base):
} }
for (int i = 0; i < %(ndim)s; ++i) for (int i = 0; i < %(ndim)s; ++i)
{ {
odims[i] = ((npy_int32*)(%(size)s->data + %(size)s->strides[0] * i))[0]; odims[i] = ((npy_int32*)(PyArray_BYTES(%(size)s) + PyArray_STRIDES(%(size)s)[0] * i))[0];
n_elements *= odims[i]; n_elements *= odims[i];
must_alloc_sample = must_alloc_sample || (PyArray_DIMS(%(o_sample)s)[i] != odims[i]); must_alloc_sample = must_alloc_sample || (PyArray_DIMS(%(o_sample)s)[i] != odims[i]);
//fprintf(stderr, "size %%i %%i\\n", i, (int)odims[i]); //fprintf(stderr, "size %%i %%i\\n", i, (int)odims[i]);
...@@ -313,8 +321,8 @@ class mrg_uniform(mrg_uniform_base): ...@@ -313,8 +321,8 @@ class mrg_uniform(mrg_uniform_base):
} }
n_streams = PyArray_DIMS(%(o_rstate)s)[0]; n_streams = PyArray_DIMS(%(o_rstate)s)[0];
sample_data = (%(otype)s *) %(o_sample)s->data; sample_data = (%(otype)s *) PyArray_DATA(%(o_sample)s);
state_data = (npy_int32 *) %(o_rstate)s->data; state_data = (npy_int32 *) PyArray_DATA(%(o_rstate)s);
for (int i = 0; i < n_elements; ++i) for (int i = 0; i < n_elements; ++i)
{ {
npy_int32 * state_data_i = state_data + (i%%n_streams)*6; npy_int32 * state_data_i = state_data + (i%%n_streams)*6;
...@@ -392,7 +400,7 @@ class GPU_mrg_uniform(mrg_uniform_base, GpuOp): ...@@ -392,7 +400,7 @@ class GPU_mrg_uniform(mrg_uniform_base, GpuOp):
def c_support_code_apply(self, node, nodename): def c_support_code_apply(self, node, nodename):
if self.output_type.dtype == 'float32': if self.output_type.dtype == 'float32':
otype = 'float' otype = 'float'
NORM = '4.6566126e-10f' #numpy.float32(1.0/(2**31+65)) NORM = '4.6566126e-10f' # numpy.float32(1.0/(2**31+65))
# this was determined by finding the biggest number such that # this was determined by finding the biggest number such that
# numpy.float32(number * M1) < 1.0 # numpy.float32(number * M1) < 1.0
else: else:
...@@ -476,7 +484,7 @@ class GPU_mrg_uniform(mrg_uniform_base, GpuOp): ...@@ -476,7 +484,7 @@ class GPU_mrg_uniform(mrg_uniform_base, GpuOp):
} }
} }
""" %locals() """ % locals()
def c_code(self, node, nodename, inp, out, sub): def c_code(self, node, nodename, inp, out, sub):
rstate, size = inp rstate, size = inp
...@@ -491,7 +499,7 @@ class GPU_mrg_uniform(mrg_uniform_base, GpuOp): ...@@ -491,7 +499,7 @@ class GPU_mrg_uniform(mrg_uniform_base, GpuOp):
else: else:
otype = 'double' otype = 'double'
SYNC="CNDA_THREAD_SYNC"; SYNC = "CNDA_THREAD_SYNC"
return """ return """
//////// <code generated by mrg_uniform> //////// <code generated by mrg_uniform>
...@@ -521,7 +529,7 @@ class GPU_mrg_uniform(mrg_uniform_base, GpuOp): ...@@ -521,7 +529,7 @@ class GPU_mrg_uniform(mrg_uniform_base, GpuOp):
} }
for (int i = 0; i < %(ndim)s; ++i) for (int i = 0; i < %(ndim)s; ++i)
{ {
odims[i] = ((npy_int32*)(%(size)s->data + %(size)s->strides[0] * i))[0]; odims[i] = ((npy_int32*)(PyArray_BYTES(%(size)s) + PyArray_STRIDES(%(size)s)[0] * i))[0];
n_elements *= odims[i]; n_elements *= odims[i];
must_alloc_sample = (must_alloc_sample must_alloc_sample = (must_alloc_sample
|| CudaNdarray_HOST_DIMS(%(o_sample)s)[i] != odims[i]); || CudaNdarray_HOST_DIMS(%(o_sample)s)[i] != odims[i]);
...@@ -593,7 +601,8 @@ class GPU_mrg_uniform(mrg_uniform_base, GpuOp): ...@@ -593,7 +601,8 @@ class GPU_mrg_uniform(mrg_uniform_base, GpuOp):
} }
//////// </ code generated by mrg_uniform> //////// </ code generated by mrg_uniform>
""" %locals() """ % locals()
def c_code_cache_version(self): def c_code_cache_version(self):
return (7,) return (7,)
...@@ -662,7 +671,7 @@ class MRG_RandomStreams(object): ...@@ -662,7 +671,7 @@ class MRG_RandomStreams(object):
elif seed >= M2: elif seed >= M2:
raise ValueError('seed should be less than %i' % M2, seed) raise ValueError('seed should be less than %i' % M2, seed)
self.rstate = numpy.asarray([seed]*6, dtype='int32') self.rstate = numpy.asarray([seed]*6, dtype='int32')
elif len(seed)==6: elif len(seed) == 6:
if seed[0] == 0 and seed[1] == 0 and seed[2] == 0: if seed[0] == 0 and seed[1] == 0 and seed[2] == 0:
raise ValueError('The first 3 values of seed should not be all 0', seed) raise ValueError('The first 3 values of seed should not be all 0', seed)
if seed[3] == 0 and seed[4] == 0 and seed[5] == 0: if seed[3] == 0 and seed[4] == 0 and seed[5] == 0:
...@@ -690,7 +699,7 @@ class MRG_RandomStreams(object): ...@@ -690,7 +699,7 @@ class MRG_RandomStreams(object):
""" """
assert n_streams < 2**72 assert n_streams < 2**72
assert n_streams > 0 assert n_streams > 0
rval = numpy.zeros((n_streams,6), dtype='int32') rval = numpy.zeros((n_streams, 6), dtype='int32')
rval[0] = self.rstate rval[0] = self.rstate
for i in xrange(1, n_streams): for i in xrange(1, n_streams):
rval[i] = ff_2p72(rval[i - 1]) rval[i] = ff_2p72(rval[i - 1])
...@@ -751,8 +760,8 @@ class MRG_RandomStreams(object): ...@@ -751,8 +760,8 @@ class MRG_RandomStreams(object):
else: else:
if not (isinstance(size, Variable) and size.ndim == 1): if not (isinstance(size, Variable) and size.ndim == 1):
raise TypeError("size must be a tuple of int or a Theano " raise TypeError("size must be a tuple of int or a Theano "
"Variable with 1 dimension, got " + str(size) + "Variable with 1 dimension, got " + str(size) +
" of type " + str(type(size))) " of type " + str(type(size)))
if nstreams is None: if nstreams is None:
nstreams = self.n_streams(size) nstreams = self.n_streams(size)
...@@ -776,20 +785,22 @@ class MRG_RandomStreams(object): ...@@ -776,20 +785,22 @@ class MRG_RandomStreams(object):
# currently no Theano node that will do a frombuffer # currently no Theano node that will do a frombuffer
# reinterpretation. # reinterpretation.
u = self.pretty_return(node_rstate, u = self.pretty_return(node_rstate,
*GPU_mrg_uniform.new(node_rstate, ndim, dtype, size)) *GPU_mrg_uniform.new(node_rstate,
ndim, dtype, size))
else: else:
node_rstate = shared(self.get_substream_rstates(nstreams)) node_rstate = shared(self.get_substream_rstates(nstreams))
u = self.pretty_return(node_rstate, u = self.pretty_return(node_rstate,
*mrg_uniform.new(node_rstate, ndim, dtype, size)) *mrg_uniform.new(node_rstate,
ndim, dtype, size))
r = u * (high - low) + low r = u * (high - low) + low
if u.type.broadcastable != r.type.broadcastable: if u.type.broadcastable != r.type.broadcastable:
raise NotImplementedError( raise NotImplementedError(
'Increase the size to match the broadcasting pattern of ' 'Increase the size to match the broadcasting pattern of '
'`low` and `high` arguments') '`low` and `high` arguments')
assert r.dtype == dtype assert r.dtype == dtype
return r return r
def binomial(self, size=None, n=1, p=0.5, ndim=None, dtype='int64', def binomial(self, size=None, n=1, p=0.5, ndim=None, dtype='int64',
nstreams=None): nstreams=None):
...@@ -934,4 +945,6 @@ def mrg_random_make_inplace(node): ...@@ -934,4 +945,6 @@ def mrg_random_make_inplace(node):
new_op = op.__class__(op.output_type, inplace=True) new_op = op.__class__(op.output_type, inplace=True)
return new_op.make_node(*node.inputs).outputs return new_op.make_node(*node.inputs).outputs
return False return False
optdb.register('random_make_inplace_mrg', opt.in2out(mrg_random_make_inplace, ignore_newtrees=True), 99, 'fast_run', 'inplace') optdb.register('random_make_inplace_mrg',
opt.in2out(mrg_random_make_inplace, ignore_newtrees=True),
99, 'fast_run', 'inplace')
This source diff could not be displayed because it is too large. You can view the blob instead.
...@@ -62,7 +62,7 @@ import copy ...@@ -62,7 +62,7 @@ import copy
def get_version(): def get_version():
return 0.278 return 0.279
@cython.boundscheck(False) @cython.boundscheck(False)
def perform( def perform(
......
...@@ -11,7 +11,7 @@ _logger = logging.getLogger('theano.scan_module.scan_perform') ...@@ -11,7 +11,7 @@ _logger = logging.getLogger('theano.scan_module.scan_perform')
_logger.setLevel(logging.WARN) _logger.setLevel(logging.WARN)
version = 0.278 # must match constant returned in function get_version() version = 0.280 # must match constant returned in function get_version()
need_reload = False need_reload = False
...@@ -52,11 +52,8 @@ except ImportError: ...@@ -52,11 +52,8 @@ except ImportError:
_logger.info("Compiling C code for scan") _logger.info("Compiling C code for scan")
dirname = 'scan_perform' dirname = 'scan_perform'
# We use a .txt extensions as otherwise it don't get
# included when we create a package to send to pypi
# This happen even if we tell to include *.c files
cfile = os.path.join(theano.__path__[0], 'scan_module', cfile = os.path.join(theano.__path__[0], 'scan_module',
'scan_perform.c.txt') 'scan_perform.c')
code = open(cfile).read() code = open(cfile).read()
loc = os.path.join(config.compiledir, dirname) loc = os.path.join(config.compiledir, dirname)
if not os.path.exists(loc): if not os.path.exists(loc):
......
...@@ -1795,9 +1795,9 @@ class AddSD(gof.op.Op): ...@@ -1795,9 +1795,9 @@ class AddSD(gof.op.Op):
} }
npy_intp N = PyArray_DIMS(%(_indptr)s)[0]-1; npy_intp N = PyArray_DIMS(%(_indptr)s)[0]-1;
const npy_int32 * __restrict__ indptr = (npy_int32 *)%(_indptr)s->data; const npy_int32 * __restrict__ indptr = (npy_int32 *)PyArray_DATA(%(_indptr)s);
const npy_int32 * __restrict__ indices = (npy_int32*)%(_indices)s->data; const npy_int32 * __restrict__ indices = (npy_int32*)PyArray_DATA(%(_indices)s);
const dtype_%(_data)s* __restrict__ data = (dtype_%(_data)s*)%(_data)s->data; const dtype_%(_data)s* __restrict__ data = (dtype_%(_data)s*)PyArray_DATA(%(_data)s);
dtype_%(y)s* ydata = (dtype_%(y)s*)PyArray_DATA(%(y)s); dtype_%(y)s* ydata = (dtype_%(y)s*)PyArray_DATA(%(y)s);
dtype_%(z)s* zdata = (dtype_%(z)s*)PyArray_DATA(%(z)s); dtype_%(z)s* zdata = (dtype_%(z)s*)PyArray_DATA(%(z)s);
...@@ -2983,10 +2983,10 @@ class StructuredDotGradCSC(gof.Op): ...@@ -2983,10 +2983,10 @@ class StructuredDotGradCSC(gof.Op):
if (PyArray_NDIM(%(_indices)s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(indices) != 1"); %(fail)s;} if (PyArray_NDIM(%(_indices)s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(indices) != 1"); %(fail)s;}
if (PyArray_NDIM(%(_indptr)s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(indptr) != 1"); %(fail)s;} if (PyArray_NDIM(%(_indptr)s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(indptr) != 1"); %(fail)s;}
if( PyArray_DESCR(%(_indices)s)->type_num != NPY_INT32) { if( PyArray_TYPE(%(_indices)s) != NPY_INT32) {
PyErr_SetString(PyExc_NotImplementedError, "C"); %(fail)s;} PyErr_SetString(PyExc_NotImplementedError, "C"); %(fail)s;}
if( PyArray_DESCR(%(_indptr)s)->type_num != NPY_INT32) if( PyArray_TYPE(%(_indptr)s) != NPY_INT32)
{PyErr_SetString(PyExc_NotImplementedError, "D"); %(fail)s;} {PyErr_SetString(PyExc_NotImplementedError, "D"); %(fail)s;}
if( PyArray_DIMS(%(_d)s)[1] != PyArray_DIMS(%(_g)s)[1]) if( PyArray_DIMS(%(_d)s)[1] != PyArray_DIMS(%(_g)s)[1])
...@@ -2996,29 +2996,29 @@ class StructuredDotGradCSC(gof.Op): ...@@ -2996,29 +2996,29 @@ class StructuredDotGradCSC(gof.Op):
|| (PyArray_DIMS(%(_zout)s)[0] != PyArray_DIMS(%(_indices)s)[0])) || (PyArray_DIMS(%(_zout)s)[0] != PyArray_DIMS(%(_indices)s)[0]))
{ {
Py_XDECREF(%(_zout)s); Py_XDECREF(%(_zout)s);
%(_zout)s = (PyArrayObject*) PyArray_SimpleNew(1, PyArray_DIMS(%(_indices)s), PyArray_DESCR(%(_g)s)->type_num); %(_zout)s = (PyArrayObject*) PyArray_SimpleNew(1, PyArray_DIMS(%(_indices)s), PyArray_TYPE(%(_g)s));
} }
{ //makes it compile even though labels jump over variable definitions. { //makes it compile even though labels jump over variable definitions.
npy_intp nnz = PyArray_DIMS(%(_indices)s)[0]; npy_intp nnz = PyArray_DIMS(%(_indices)s)[0];
npy_intp N = PyArray_DIMS(%(_indptr)s)[0]-1; //TODO: error checking with this npy_intp N = PyArray_DIMS(%(_indptr)s)[0]-1; //TODO: error checking with this
npy_intp Sindices = %(_indices)s->strides[0]/PyArray_DESCR(%(_indices)s)->elsize; npy_intp Sindices = PyArray_STRIDES(%(_indices)s)[0]/PyArray_DESCR(%(_indices)s)->elsize;
npy_intp Sindptr = %(_indptr)s->strides[0]/PyArray_DESCR(%(_indptr)s)->elsize; npy_intp Sindptr = PyArray_STRIDES(%(_indptr)s)[0]/PyArray_DESCR(%(_indptr)s)->elsize;
const npy_intp Sd1 = %(_d)s->strides[1]/PyArray_DESCR(%(_d)s)->elsize; const npy_intp Sd1 = PyArray_STRIDES(%(_d)s)[1]/PyArray_DESCR(%(_d)s)->elsize;
const npy_intp Sg1 = %(_g)s->strides[1]/PyArray_DESCR(%(_g)s)->elsize; const npy_intp Sg1 = PyArray_STRIDES(%(_g)s)[1]/PyArray_DESCR(%(_g)s)->elsize;
const npy_intp K = PyArray_DIMS(%(_d)s)[1]; const npy_intp K = PyArray_DIMS(%(_d)s)[1];
const npy_int32 * __restrict__ indptr = (npy_int32 *)%(_indptr)s->data; const npy_int32 * __restrict__ indptr = (npy_int32 *)PyArray_DATA(%(_indptr)s);
const npy_int32 * __restrict__ indices = (npy_int32 *)%(_indices)s->data; const npy_int32 * __restrict__ indices = (npy_int32 *)PyArray_DATA(%(_indices)s);
// loop over columns // loop over columns
for (npy_int32 j = 0; j < N; ++j) for (npy_int32 j = 0; j < N; ++j)
{ {
// extract j-th row of dense matrix // extract j-th row of dense matrix
const dtype_%(_d)s* __restrict__ d_row = (dtype_%(_d)s*)(%(_d)s->data + %(_d)s->strides[0] * j); const dtype_%(_d)s* __restrict__ d_row = (dtype_%(_d)s*)(PyArray_BYTES(%(_d)s) + PyArray_STRIDES(%(_d)s)[0] * j);
if(j >= PyArray_DIMS(%(_d)s)[0]) {PyErr_SetString(PyExc_NotImplementedError, "G"); %(fail)s;} if(j >= PyArray_DIMS(%(_d)s)[0]) {PyErr_SetString(PyExc_NotImplementedError, "G"); %(fail)s;}
// for each non-null value in the sparse column // for each non-null value in the sparse column
...@@ -3028,7 +3028,7 @@ class StructuredDotGradCSC(gof.Op): ...@@ -3028,7 +3028,7 @@ class StructuredDotGradCSC(gof.Op):
npy_int32 i = indices[i_idx * Sindices]; npy_int32 i = indices[i_idx * Sindices];
// extract corresponding row in gradient // extract corresponding row in gradient
const dtype_%(_g)s* __restrict__ g_row = (dtype_%(_g)s*)(%(_g)s->data + %(_g)s->strides[0] * i); const dtype_%(_g)s* __restrict__ g_row = (dtype_%(_g)s*)(PyArray_BYTES(%(_g)s) + PyArray_STRIDES(%(_g)s)[0] * i);
double ip = 0.0; double ip = 0.0;
// make sure that row index is not bigger than actual number of rows // make sure that row index is not bigger than actual number of rows
...@@ -3044,7 +3044,7 @@ class StructuredDotGradCSC(gof.Op): ...@@ -3044,7 +3044,7 @@ class StructuredDotGradCSC(gof.Op):
} }
// write resulting gradient to sparse output // write resulting gradient to sparse output
((dtype_%(_zout)s* __restrict__)(%(_zout)s->data + i_idx * %(_zout)s->strides[0]))[0] = ip; ((dtype_%(_zout)s* __restrict__)(PyArray_BYTES(%(_zout)s) + i_idx * PyArray_STRIDES(%(_zout)s)[0]))[0] = ip;
} }
} }
} }
...@@ -3119,10 +3119,10 @@ class StructuredDotGradCSR(gof.Op): ...@@ -3119,10 +3119,10 @@ class StructuredDotGradCSR(gof.Op):
if (PyArray_NDIM(%(_indices)s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(indices) != 1"); %(fail)s;} if (PyArray_NDIM(%(_indices)s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(indices) != 1"); %(fail)s;}
if (PyArray_NDIM(%(_indptr)s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(indptr) != 1"); %(fail)s;} if (PyArray_NDIM(%(_indptr)s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(indptr) != 1"); %(fail)s;}
if( PyArray_DESCR(%(_indices)s)->type_num != NPY_INT32) { if( PyArray_TYPE(%(_indices)s) != NPY_INT32) {
PyErr_SetString(PyExc_NotImplementedError, "C"); %(fail)s;} PyErr_SetString(PyExc_NotImplementedError, "C"); %(fail)s;}
if( PyArray_DESCR(%(_indptr)s)->type_num != NPY_INT32) if( PyArray_TYPE(%(_indptr)s) != NPY_INT32)
{PyErr_SetString(PyExc_NotImplementedError, "D"); %(fail)s;} {PyErr_SetString(PyExc_NotImplementedError, "D"); %(fail)s;}
if( PyArray_DIMS(%(_d)s)[1] != PyArray_DIMS(%(_g)s)[1]) if( PyArray_DIMS(%(_d)s)[1] != PyArray_DIMS(%(_g)s)[1])
...@@ -3132,7 +3132,7 @@ class StructuredDotGradCSR(gof.Op): ...@@ -3132,7 +3132,7 @@ class StructuredDotGradCSR(gof.Op):
|| (PyArray_DIMS(%(_zout)s)[0] != PyArray_DIMS(%(_indices)s)[0])) || (PyArray_DIMS(%(_zout)s)[0] != PyArray_DIMS(%(_indices)s)[0]))
{ {
Py_XDECREF(%(_zout)s); Py_XDECREF(%(_zout)s);
%(_zout)s = (PyArrayObject*) PyArray_SimpleNew(1, PyArray_DIMS(%(_indices)s), PyArray_DESCR(%(_g)s)->type_num); %(_zout)s = (PyArrayObject*) PyArray_SimpleNew(1, PyArray_DIMS(%(_indices)s), PyArray_TYPE(%(_g)s));
} }
{ //makes it compile even though labels jump over variable definitions. { //makes it compile even though labels jump over variable definitions.
...@@ -3140,16 +3140,16 @@ class StructuredDotGradCSR(gof.Op): ...@@ -3140,16 +3140,16 @@ class StructuredDotGradCSR(gof.Op):
// extract number of rows // extract number of rows
npy_intp N = PyArray_DIMS(%(_indptr)s)[0]-1; //TODO: error checking with this npy_intp N = PyArray_DIMS(%(_indptr)s)[0]-1; //TODO: error checking with this
npy_intp Sindices = %(_indices)s->strides[0]/PyArray_DESCR(%(_indices)s)->elsize; npy_intp Sindices = PyArray_STRIDES(%(_indices)s)[0]/PyArray_DESCR(%(_indices)s)->elsize;
npy_intp Sindptr = %(_indptr)s->strides[0]/PyArray_DESCR(%(_indptr)s)->elsize; npy_intp Sindptr = PyArray_STRIDES(%(_indptr)s)[0]/PyArray_DESCR(%(_indptr)s)->elsize;
const npy_intp Sd1 = %(_d)s->strides[1]/PyArray_DESCR(%(_d)s)->elsize; const npy_intp Sd1 = PyArray_STRIDES(%(_d)s)[1]/PyArray_DESCR(%(_d)s)->elsize;
const npy_intp Sg1 = %(_g)s->strides[1]/PyArray_DESCR(%(_g)s)->elsize; const npy_intp Sg1 = PyArray_STRIDES(%(_g)s)[1]/PyArray_DESCR(%(_g)s)->elsize;
const npy_intp K = PyArray_DIMS(%(_d)s)[1]; const npy_intp K = PyArray_DIMS(%(_d)s)[1];
const npy_int32 * __restrict__ indptr = (npy_int32 *)%(_indptr)s->data; const npy_int32 * __restrict__ indptr = (npy_int32 *)PyArray_DATA(%(_indptr)s);
const npy_int32 * __restrict__ indices = (npy_int32 *)%(_indices)s->data; const npy_int32 * __restrict__ indices = (npy_int32 *)PyArray_DATA(%(_indices)s);
// loop over columns of sparse matrix // loop over columns of sparse matrix
for (npy_int32 i = 0; i < N; ++i) for (npy_int32 i = 0; i < N; ++i)
...@@ -3161,11 +3161,11 @@ class StructuredDotGradCSR(gof.Op): ...@@ -3161,11 +3161,11 @@ class StructuredDotGradCSR(gof.Op):
npy_int32 j = indices[j_idx * Sindices]; npy_int32 j = indices[j_idx * Sindices];
// extract j-th row of dense matrix // extract j-th row of dense matrix
const dtype_%(_d)s* __restrict__ d_row = (dtype_%(_d)s*)(%(_d)s->data + %(_d)s->strides[0] * j); const dtype_%(_d)s* __restrict__ d_row = (dtype_%(_d)s*)(PyArray_BYTES(%(_d)s) + PyArray_STRIDES(%(_d)s)[0] * j);
if(j >= PyArray_DIMS(%(_d)s)[0]) {PyErr_SetString(PyExc_NotImplementedError, "G"); %(fail)s;} if(j >= PyArray_DIMS(%(_d)s)[0]) {PyErr_SetString(PyExc_NotImplementedError, "G"); %(fail)s;}
// extract corresponding row in gradient // extract corresponding row in gradient
const dtype_%(_g)s* __restrict__ g_row = (dtype_%(_g)s*)(%(_g)s->data + %(_g)s->strides[0] * i); const dtype_%(_g)s* __restrict__ g_row = (dtype_%(_g)s*)(PyArray_BYTES(%(_g)s) + PyArray_STRIDES(%(_g)s)[0] * i);
double ip = 0.0; double ip = 0.0;
// make sure that row index is not bigger than actual number of rows // make sure that row index is not bigger than actual number of rows
...@@ -3181,7 +3181,7 @@ class StructuredDotGradCSR(gof.Op): ...@@ -3181,7 +3181,7 @@ class StructuredDotGradCSR(gof.Op):
} }
// write resulting gradient to sparse output // write resulting gradient to sparse output
((dtype_%(_zout)s* __restrict__)(%(_zout)s->data + j_idx * %(_zout)s->strides[0]))[0] = ip; ((dtype_%(_zout)s* __restrict__)(PyArray_BYTES(%(_zout)s) + j_idx * PyArray_STRIDES(%(_zout)s)[0]))[0] = ip;
} }
} }
} }
......
...@@ -142,19 +142,19 @@ class StructuredDotCSC(gof.Op): ...@@ -142,19 +142,19 @@ class StructuredDotCSC(gof.Op):
if (PyArray_NDIM(%(a_nrows)s) != 0) {PyErr_SetString(PyExc_NotImplementedError, "rank(nrows) != 0"); %(fail)s;} if (PyArray_NDIM(%(a_nrows)s) != 0) {PyErr_SetString(PyExc_NotImplementedError, "rank(nrows) != 0"); %(fail)s;}
if (PyArray_NDIM(%(b)s) != 2) {PyErr_SetString(PyExc_NotImplementedError, "rank(b) != 2"); %(fail)s;} if (PyArray_NDIM(%(b)s) != 2) {PyErr_SetString(PyExc_NotImplementedError, "rank(b) != 2"); %(fail)s;}
if (PyArray_DESCR(%(a_val)s)->type_num != %(typenum_a_val)s) { if (PyArray_TYPE(%(a_val)s) != %(typenum_a_val)s) {
PyErr_SetString(PyExc_NotImplementedError, "Invalid type for a_val"); %(fail)s;} PyErr_SetString(PyExc_NotImplementedError, "Invalid type for a_val"); %(fail)s;}
if (PyArray_DESCR(%(b)s)->type_num != %(typenum_b)s) { if (PyArray_TYPE(%(b)s) != %(typenum_b)s) {
PyErr_SetString(PyExc_NotImplementedError, "Invalid type for b"); %(fail)s;} PyErr_SetString(PyExc_NotImplementedError, "Invalid type for b"); %(fail)s;}
if (PyArray_DESCR(%(a_ind)s)->type_num != NPY_INT32) { if (PyArray_TYPE(%(a_ind)s) != NPY_INT32) {
PyErr_SetString(PyExc_NotImplementedError, "a_ind dtype not INT32"); %(fail)s;} PyErr_SetString(PyExc_NotImplementedError, "a_ind dtype not INT32"); %(fail)s;}
if (PyArray_DESCR(%(a_ptr)s)->type_num != NPY_INT32) if (PyArray_TYPE(%(a_ptr)s) != NPY_INT32)
{PyErr_SetString(PyExc_NotImplementedError, "a_ptr dtype not INT32"); %(fail)s;} {PyErr_SetString(PyExc_NotImplementedError, "a_ptr dtype not INT32"); %(fail)s;}
if (PyArray_DESCR(%(a_nrows)s)->type_num != NPY_INT32) if (PyArray_TYPE(%(a_nrows)s) != NPY_INT32)
{PyErr_SetString(PyExc_NotImplementedError, "a_nrows dtype not INT32"); %(fail)s;} {PyErr_SetString(PyExc_NotImplementedError, "a_nrows dtype not INT32"); %(fail)s;}
if (PyArray_DIMS(%(a_val)s)[0] != PyArray_DIMS(%(a_ind)s)[0]) if (PyArray_DIMS(%(a_val)s)[0] != PyArray_DIMS(%(a_ind)s)[0])
...@@ -164,13 +164,13 @@ class StructuredDotCSC(gof.Op): ...@@ -164,13 +164,13 @@ class StructuredDotCSC(gof.Op):
{PyErr_SetString(PyExc_NotImplementedError, "a's number of columns doesn't match b's rows"); %(fail)s;} {PyErr_SetString(PyExc_NotImplementedError, "a's number of columns doesn't match b's rows"); %(fail)s;}
if ((!%(z)s) if ((!%(z)s)
|| (PyArray_DIMS(%(z)s)[0] != ((npy_int32 *)%(a_nrows)s->data)[0]) || (PyArray_DIMS(%(z)s)[0] != ((npy_int32 *)PyArray_DATA(%(a_nrows)s))[0])
|| (PyArray_DIMS(%(z)s)[1] != PyArray_DIMS(%(b)s)[1]) || (PyArray_DIMS(%(z)s)[1] != PyArray_DIMS(%(b)s)[1])
) )
{ {
{Py_XDECREF(%(z)s);} {Py_XDECREF(%(z)s);}
npy_intp dims[] = {0, 0}; npy_intp dims[] = {0, 0};
dims[0] = ((npy_int32 *)%(a_nrows)s->data)[0]; dims[0] = ((npy_int32 *)PyArray_DATA(%(a_nrows)s))[0];
dims[1] = PyArray_DIMS(%(b)s)[1]; dims[1] = PyArray_DIMS(%(b)s)[1];
%(z)s = (PyArrayObject*) PyArray_SimpleNew(2, dims, %(typenum_z)s); %(z)s = (PyArrayObject*) PyArray_SimpleNew(2, dims, %(typenum_z)s);
} }
...@@ -182,19 +182,19 @@ class StructuredDotCSC(gof.Op): ...@@ -182,19 +182,19 @@ class StructuredDotCSC(gof.Op):
npy_intp K = PyArray_DIMS(%(b)s)[0]; npy_intp K = PyArray_DIMS(%(b)s)[0];
// strides tell you how many bytes to skip to go to next column/row entry // strides tell you how many bytes to skip to go to next column/row entry
npy_intp Szm = %(z)s->strides[0] / PyArray_DESCR(%(z)s)->elsize; npy_intp Szm = PyArray_STRIDES(%(z)s)[0] / PyArray_DESCR(%(z)s)->elsize;
npy_intp Szn = %(z)s->strides[1] / PyArray_DESCR(%(z)s)->elsize; npy_intp Szn = PyArray_STRIDES(%(z)s)[1] / PyArray_DESCR(%(z)s)->elsize;
//npy_intp Sbm = %(b)s->strides[0] / PyArray_DESCR(%(b)s)->elsize; //npy_intp Sbm = PyArray_STRIDES(%(b)s)[0] / PyArray_DESCR(%(b)s)->elsize;
npy_intp Sbn = %(b)s->strides[1] / PyArray_DESCR(%(b)s)->elsize; npy_intp Sbn = PyArray_STRIDES(%(b)s)[1] / PyArray_DESCR(%(b)s)->elsize;
npy_intp Sval = %(a_val)s->strides[0] / PyArray_DESCR(%(a_val)s)->elsize; npy_intp Sval = PyArray_STRIDES(%(a_val)s)[0] / PyArray_DESCR(%(a_val)s)->elsize;
npy_intp Sind = %(a_ind)s->strides[0] / PyArray_DESCR(%(a_ind)s)->elsize; npy_intp Sind = PyArray_STRIDES(%(a_ind)s)[0] / PyArray_DESCR(%(a_ind)s)->elsize;
npy_intp Sptr = %(a_ptr)s->strides[0] / PyArray_DESCR(%(a_ptr)s)->elsize; npy_intp Sptr = PyArray_STRIDES(%(a_ptr)s)[0] / PyArray_DESCR(%(a_ptr)s)->elsize;
// pointers to access actual data in the arrays passed as params. // pointers to access actual data in the arrays passed as params.
dtype_%(z)s* __restrict__ Dz = (dtype_%(z)s*)%(z)s->data; dtype_%(z)s* __restrict__ Dz = (dtype_%(z)s*)PyArray_DATA(%(z)s);
const dtype_%(a_val)s* __restrict__ Dval = (dtype_%(a_val)s*)%(a_val)s->data; const dtype_%(a_val)s* __restrict__ Dval = (dtype_%(a_val)s*)PyArray_DATA(%(a_val)s);
const npy_int32 * __restrict__ Dind = (npy_int32*)%(a_ind)s->data; const npy_int32 * __restrict__ Dind = (npy_int32*)PyArray_DATA(%(a_ind)s;
const npy_int32 * __restrict__ Dptr = (npy_int32*)%(a_ptr)s->data; const npy_int32 * __restrict__ Dptr = (npy_int32*)PyArray_DATA(%(a_ptr)s;
//npy_intp nnz = PyArray_DIMS(%(a_ind)s)[0]; //npy_intp nnz = PyArray_DIMS(%(a_ind)s)[0];
...@@ -218,7 +218,7 @@ class StructuredDotCSC(gof.Op): ...@@ -218,7 +218,7 @@ class StructuredDotCSC(gof.Op):
for (npy_int32 k = 0; k < K; ++k) for (npy_int32 k = 0; k < K; ++k)
{ {
// get pointer to k-th row of dense matrix // get pointer to k-th row of dense matrix
const dtype_%(b)s* __restrict__ bk = (dtype_%(b)s*)(%(b)s->data + %(b)s->strides[0] * k); const dtype_%(b)s* __restrict__ bk = (dtype_%(b)s*)(PyArray_BYTES(%(b)s) + PyArray_STRIDES(%(b)s)[0] * k);
// loop over sparse column indices through index pointer array // loop over sparse column indices through index pointer array
// (amounts to looping over rows M of sparse matrix) // (amounts to looping over rows M of sparse matrix)
...@@ -229,7 +229,7 @@ class StructuredDotCSC(gof.Op): ...@@ -229,7 +229,7 @@ class StructuredDotCSC(gof.Op):
const dtype_%(a_val)s Amk = Dval[m_idx * Sval]; // actual value at that location const dtype_%(a_val)s Amk = Dval[m_idx * Sval]; // actual value at that location
// pointer to m-th row of the output matrix Z // pointer to m-th row of the output matrix Z
dtype_%(z)s* __restrict__ zm = (dtype_%(z)s*)(%(z)s->data + %(z)s->strides[0] * m); dtype_%(z)s* __restrict__ zm = (dtype_%(z)s*)(PyArray_BYTES(%(z)s) + PyArray_STRIDES(%(z)s)[0] * m);
//RESOLVE: a.shape[0] equals z.shape[0], why is this not an equality constraint? //RESOLVE: a.shape[0] equals z.shape[0], why is this not an equality constraint?
if (m >= PyArray_DIMS(%(z)s)[0]) if (m >= PyArray_DIMS(%(z)s)[0])
...@@ -330,10 +330,10 @@ class StructuredDotCSR(gof.Op): ...@@ -330,10 +330,10 @@ class StructuredDotCSR(gof.Op):
if (PyArray_NDIM(%(a_ptr)s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(a_ptr) != 1"); %(fail)s;} if (PyArray_NDIM(%(a_ptr)s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(a_ptr) != 1"); %(fail)s;}
if (PyArray_NDIM(%(b)s) != 2) {PyErr_SetString(PyExc_NotImplementedError, "rank(b) != 2"); %(fail)s;} if (PyArray_NDIM(%(b)s) != 2) {PyErr_SetString(PyExc_NotImplementedError, "rank(b) != 2"); %(fail)s;}
if (PyArray_DESCR(%(a_ind)s)->type_num != NPY_INT32) { if (PyArray_TYPE(%(a_ind)s) != NPY_INT32) {
PyErr_SetString(PyExc_NotImplementedError, "a_ind dtype not INT32"); %(fail)s;} PyErr_SetString(PyExc_NotImplementedError, "a_ind dtype not INT32"); %(fail)s;}
if (PyArray_DESCR(%(a_ptr)s)->type_num != NPY_INT32) if (PyArray_TYPE(%(a_ptr)s) != NPY_INT32)
{PyErr_SetString(PyExc_NotImplementedError, "a_ptr dtype not INT32"); %(fail)s;} {PyErr_SetString(PyExc_NotImplementedError, "a_ptr dtype not INT32"); %(fail)s;}
if (PyArray_DIMS(%(a_val)s)[0] != PyArray_DIMS(%(a_ind)s)[0]) if (PyArray_DIMS(%(a_val)s)[0] != PyArray_DIMS(%(a_ind)s)[0])
...@@ -358,19 +358,19 @@ class StructuredDotCSR(gof.Op): ...@@ -358,19 +358,19 @@ class StructuredDotCSR(gof.Op):
npy_intp K = PyArray_DIMS(%(b)s)[0]; npy_intp K = PyArray_DIMS(%(b)s)[0];
// strides tell you how many bytes to skip to go to next column/row entry // strides tell you how many bytes to skip to go to next column/row entry
npy_intp Szm = %(z)s->strides[0] / PyArray_DESCR(%(z)s)->elsize; npy_intp Szm = PyArray_STRIDES(%(z)s)[0] / PyArray_DESCR(%(z)s)->elsize;
npy_intp Szn = %(z)s->strides[1] / PyArray_DESCR(%(z)s)->elsize; npy_intp Szn = PyArray_STRIDES(%(z)s)[1] / PyArray_DESCR(%(z)s)->elsize;
npy_intp Sbm = %(b)s->strides[0] / PyArray_DESCR(%(b)s)->elsize; npy_intp Sbm = PyArray_STRIDES(%(b)s)[0] / PyArray_DESCR(%(b)s)->elsize;
npy_intp Sbn = %(b)s->strides[1] / PyArray_DESCR(%(b)s)->elsize; npy_intp Sbn = PyArray_STRIDES(%(b)s)[1] / PyArray_DESCR(%(b)s)->elsize;
npy_intp Sval = %(a_val)s->strides[0] / PyArray_DESCR(%(a_val)s)->elsize; npy_intp Sval = PyArray_STRIDES(%(a_val)s)[0] / PyArray_DESCR(%(a_val)s)->elsize;
npy_intp Sind = %(a_ind)s->strides[0] / PyArray_DESCR(%(a_ind)s)->elsize; npy_intp Sind = PyArray_STRIDES(%(a_ind)s)[0] / PyArray_DESCR(%(a_ind)s)->elsize;
npy_intp Sptr = %(a_ptr)s->strides[0] / PyArray_DESCR(%(a_ptr)s)->elsize; npy_intp Sptr = PyArray_STRIDES(%(a_ptr)s)[0] / PyArray_DESCR(%(a_ptr)s)->elsize;
// pointers to access actual data in the arrays passed as params. // pointers to access actual data in the arrays passed as params.
dtype_%(z)s* __restrict__ Dz = (dtype_%(z)s*)%(z)s->data; dtype_%(z)s* __restrict__ Dz = (dtype_%(z)s*)PyArray_DATA(%(z)s);
const dtype_%(a_val)s* __restrict__ Dval = (dtype_%(a_val)s*)%(a_val)s->data; const dtype_%(a_val)s* __restrict__ Dval = (dtype_%(a_val)s*)PyArray_DATA(%(a_val)s);
const npy_int32 * __restrict__ Dind = (npy_int32*)%(a_ind)s->data; const npy_int32 * __restrict__ Dind = (npy_int32*)PyArray_DATA(%(a_ind)s);
const npy_int32 * __restrict__ Dptr = (npy_int32*)%(a_ptr)s->data; const npy_int32 * __restrict__ Dptr = (npy_int32*)PyArray_DATA(%(a_ptr)s);
//npy_intp nnz = PyArray_DIMS(%(a_ind)s)[0]; //npy_intp nnz = PyArray_DIMS(%(a_ind)s)[0];
...@@ -393,7 +393,7 @@ class StructuredDotCSR(gof.Op): ...@@ -393,7 +393,7 @@ class StructuredDotCSR(gof.Op):
for (npy_int64 m = 0; m < M; ++m) for (npy_int64 m = 0; m < M; ++m)
{ {
// pointer to m-th row of the output matrix Z // pointer to m-th row of the output matrix Z
dtype_%(z)s* __restrict__ zm = (dtype_%(z)s*)(%(z)s->data + %(z)s->strides[0] * m); dtype_%(z)s* __restrict__ zm = (dtype_%(z)s*)(PyArray_BYTES(%(z)s) + PyArray_STRIDES(%(z)s)[0] * m);
// loop over sparse rows indices through index pointer array // loop over sparse rows indices through index pointer array
// (amounts to looping over cols k of sparse matrix) // (amounts to looping over cols k of sparse matrix)
...@@ -403,7 +403,7 @@ class StructuredDotCSR(gof.Op): ...@@ -403,7 +403,7 @@ class StructuredDotCSR(gof.Op):
const dtype_%(a_val)s Amk = Dval[k_idx * Sval]; // actual value at that location const dtype_%(a_val)s Amk = Dval[k_idx * Sval]; // actual value at that location
// get pointer to k-th row of dense matrix // get pointer to k-th row of dense matrix
const dtype_%(b)s* __restrict__ bk = (dtype_%(b)s*)(%(b)s->data + %(b)s->strides[0] * k); const dtype_%(b)s* __restrict__ bk = (dtype_%(b)s*)(PyArray_BYTES(%(b)s) + PyArray_STRIDES(%(b)s)[0] * k);
// loop over final dimension (cols of dense matrix) and perform dot product // loop over final dimension (cols of dense matrix) and perform dot product
for(npy_int32 n = 0; n < N; ++n) for(npy_int32 n = 0; n < N; ++n)
...@@ -566,25 +566,25 @@ class UsmmCscDense(gof.Op): ...@@ -566,25 +566,25 @@ class UsmmCscDense(gof.Op):
if (PyArray_NDIM(%(x_nrows)s) != 0) {PyErr_SetString(PyExc_NotImplementedError, "rank(nrows) != 0"); %(fail)s;} if (PyArray_NDIM(%(x_nrows)s) != 0) {PyErr_SetString(PyExc_NotImplementedError, "rank(nrows) != 0"); %(fail)s;}
if (PyArray_NDIM(%(y)s) != 2) {PyErr_SetString(PyExc_NotImplementedError, "rank(y) != 2"); %(fail)s;} if (PyArray_NDIM(%(y)s) != 2) {PyErr_SetString(PyExc_NotImplementedError, "rank(y) != 2"); %(fail)s;}
if (PyArray_DESCR(%(x_val)s)->type_num != %(typenum_x_val)s) { if (PyArray_TYPE(%(x_val)s) != %(typenum_x_val)s) {
PyErr_SetString(PyExc_NotImplementedError, "Invalid type for x_val"); %(fail)s;} PyErr_SetString(PyExc_NotImplementedError, "Invalid type for x_val"); %(fail)s;}
if (PyArray_DESCR(%(y)s)->type_num != %(typenum_y)s) { if (PyArray_TYPE(%(y)s) != %(typenum_y)s) {
PyErr_SetString(PyExc_NotImplementedError, "Invalid type for y"); %(fail)s;} PyErr_SetString(PyExc_NotImplementedError, "Invalid type for y"); %(fail)s;}
if (PyArray_DESCR(%(z)s)->type_num != %(typenum_z)s) { if (PyArray_TYPE(%(z)s) != %(typenum_z)s) {
PyErr_SetString(PyExc_NotImplementedError, "Invalid type for z"); %(fail)s;} PyErr_SetString(PyExc_NotImplementedError, "Invalid type for z"); %(fail)s;}
if (PyArray_DESCR(%(alpha)s)->type_num != %(typenum_alpha)s) { if (PyArray_TYPE(%(alpha)s) != %(typenum_alpha)s) {
PyErr_SetString(PyExc_NotImplementedError, "Invalid type for alpha"); %(fail)s;} PyErr_SetString(PyExc_NotImplementedError, "Invalid type for alpha"); %(fail)s;}
if (PyArray_DESCR(%(x_ind)s)->type_num != NPY_INT32) { if (PyArray_TYPE(%(x_ind)s) != NPY_INT32) {
PyErr_SetString(PyExc_NotImplementedError, "x_ind dtype not INT32"); %(fail)s;} PyErr_SetString(PyExc_NotImplementedError, "x_ind dtype not INT32"); %(fail)s;}
if (PyArray_DESCR(%(x_ptr)s)->type_num != NPY_INT32) if (PyArray_TYPE(%(x_ptr)s) != NPY_INT32)
{PyErr_SetString(PyExc_NotImplementedError, "x_ptr dtype not INT32"); %(fail)s;} {PyErr_SetString(PyExc_NotImplementedError, "x_ptr dtype not INT32"); %(fail)s;}
if (PyArray_DESCR(%(x_nrows)s)->type_num != NPY_INT32) if (PyArray_TYPE(%(x_nrows)s) != NPY_INT32)
{PyErr_SetString(PyExc_NotImplementedError, "x_nrows dtype not INT32"); %(fail)s;} {PyErr_SetString(PyExc_NotImplementedError, "x_nrows dtype not INT32"); %(fail)s;}
if (PyArray_DIMS(%(x_val)s)[0] != PyArray_DIMS(%(x_ind)s)[0]) if (PyArray_DIMS(%(x_val)s)[0] != PyArray_DIMS(%(x_ind)s)[0])
...@@ -593,7 +593,7 @@ class UsmmCscDense(gof.Op): ...@@ -593,7 +593,7 @@ class UsmmCscDense(gof.Op):
if (PyArray_DIMS(%(x_ptr)s)[0] != PyArray_DIMS(%(y)s)[0]+1) if (PyArray_DIMS(%(x_ptr)s)[0] != PyArray_DIMS(%(y)s)[0]+1)
{PyErr_SetString(PyExc_NotImplementedError, "x's number of columns doesn't match y's rows"); %(fail)s;} {PyErr_SetString(PyExc_NotImplementedError, "x's number of columns doesn't match y's rows"); %(fail)s;}
if (PyArray_DIMS(%(z)s)[0] != ((npy_int32 *)%(x_nrows)s->data)[0] || PyArray_DIMS(%(z)s)[1] != PyArray_DIMS(%(y)s)[1]) if (PyArray_DIMS(%(z)s)[0] != ((npy_int32 *)PyArray_DATA(%(x_nrows)s))[0] || PyArray_DIMS(%(z)s)[1] != PyArray_DIMS(%(y)s)[1])
{PyErr_SetString(PyExc_NotImplementedError, "The dimension of the allocated output doesn't match the correct output size."); %(fail)s;} {PyErr_SetString(PyExc_NotImplementedError, "The dimension of the allocated output doesn't match the correct output size."); %(fail)s;}
if (PyArray_SIZE(%(alpha)s) != 1) if (PyArray_SIZE(%(alpha)s) != 1)
...@@ -621,13 +621,13 @@ class UsmmCscDense(gof.Op): ...@@ -621,13 +621,13 @@ class UsmmCscDense(gof.Op):
Py_INCREF(%(zn)s); Py_INCREF(%(zn)s);
} }
else if (!%(zn)s else if (!%(zn)s
|| (PyArray_DIMS(%(zn)s)[0] != ((npy_int32 *)%(x_nrows)s->data)[0]) || (PyArray_DIMS(%(zn)s)[0] != ((npy_int32 *)PyArray_DATA(%(x_nrows)s))[0])
|| (PyArray_DIMS(%(zn)s)[1] != PyArray_DIMS(%(y)s)[1]) || (PyArray_DIMS(%(zn)s)[1] != PyArray_DIMS(%(y)s)[1])
) )
{ {
{Py_XDECREF(%(zn)s);} {Py_XDECREF(%(zn)s);}
npy_intp dims[] = {0, 0}; npy_intp dims[] = {0, 0};
dims[0] = ((npy_int32 *)%(x_nrows)s->data)[0]; dims[0] = ((npy_int32 *)PyArray_DATA(%(x_nrows)s))[0];
dims[1] = PyArray_DIMS(%(y)s)[1]; dims[1] = PyArray_DIMS(%(y)s)[1];
%(zn)s = (PyArrayObject*) PyArray_SimpleNew(2, dims, %(typenum_zn)s); %(zn)s = (PyArrayObject*) PyArray_SimpleNew(2, dims, %(typenum_zn)s);
} }
...@@ -639,17 +639,17 @@ class UsmmCscDense(gof.Op): ...@@ -639,17 +639,17 @@ class UsmmCscDense(gof.Op):
npy_intp K = PyArray_DIMS(%(y)s)[0]; npy_intp K = PyArray_DIMS(%(y)s)[0];
// pointers to access actual data in the arrays passed as params. // pointers to access actual data in the arrays passed as params.
const dtype_%(x_val)s* __restrict__ Dval = (dtype_%(x_val)s*)%(x_val)s->data; const dtype_%(x_val)s* __restrict__ Dval = (dtype_%(x_val)s*)PyArray_DATA(%(x_val)s);
const npy_int32 * __restrict__ Dind = (npy_int32*)%(x_ind)s->data; const npy_int32 * __restrict__ Dind = (npy_int32*)PyArray_DATA(%(x_ind)s);
const npy_int32 * __restrict__ Dptr = (npy_int32*)%(x_ptr)s->data; const npy_int32 * __restrict__ Dptr = (npy_int32*)PyArray_DATA(%(x_ptr)s);
const dtype_%(alpha)s alpha = ((dtype_%(alpha)s*)%(alpha)s->data)[0]; const dtype_%(alpha)s alpha = ((dtype_%(alpha)s*)PyArray_DATA(%(alpha)s))[0];
npy_intp Sz = %(z)s->strides[1] / PyArray_DESCR(%(z)s)->elsize; npy_intp Sz = PyArray_STRIDES(%(z)s)[1] / PyArray_DESCR(%(z)s)->elsize;
npy_intp Szn = %(zn)s->strides[1] / PyArray_DESCR(%(zn)s)->elsize; npy_intp Szn = PyArray_STRIDES(%(zn)s)[1] / PyArray_DESCR(%(zn)s)->elsize;
npy_intp Sval = %(x_val)s->strides[0] / PyArray_DESCR(%(x_val)s)->elsize; npy_intp Sval = PyArray_STRIDES(%(x_val)s)[0] / PyArray_DESCR(%(x_val)s)->elsize;
npy_intp Sind = %(x_ind)s->strides[0] / PyArray_DESCR(%(x_ind)s)->elsize; npy_intp Sind = PyArray_STRIDES(%(x_ind)s)[0] / PyArray_DESCR(%(x_ind)s)->elsize;
npy_intp Sptr = %(x_ptr)s->strides[0] / PyArray_DESCR(%(x_ptr)s)->elsize; npy_intp Sptr = PyArray_STRIDES(%(x_ptr)s)[0] / PyArray_DESCR(%(x_ptr)s)->elsize;
npy_intp Sy = %(y)s->strides[1] / PyArray_DESCR(%(y)s)->elsize; npy_intp Sy = PyArray_STRIDES(%(y)s)[1] / PyArray_DESCR(%(y)s)->elsize;
if (!(%(inplace)s)) if (!(%(inplace)s))
...@@ -669,14 +669,14 @@ class UsmmCscDense(gof.Op): ...@@ -669,14 +669,14 @@ class UsmmCscDense(gof.Op):
const dtype_%(x_val)s Amk = alpha * Dval[m_idx * Sval]; // actual value at that location const dtype_%(x_val)s Amk = alpha * Dval[m_idx * Sval]; // actual value at that location
dtype_%(y)s* y_row = (dtype_%(y)s*)(%(y)s->data + %(y)s->strides[0] * k); dtype_%(y)s* y_row = (dtype_%(y)s*)(PyArray_BYTES(%(y)s) + PyArray_STRIDES(%(y)s)[0] * k);
// axpy expects pointer to the beginning of memory arrays, // axpy expects pointer to the beginning of memory arrays,
// so when the stride is negative, we need to get the // so when the stride is negative, we need to get the
// last element // last element
if (Sy < 0) if (Sy < 0)
y_row += (K - 1) * Sy; y_row += (K - 1) * Sy;
dtype_%(zn)s* z_row = (dtype_%(zn)s*)(%(zn)s->data + %(zn)s->strides[0] * m); dtype_%(zn)s* z_row = (dtype_%(zn)s*)(PyArray_BYTES(%(zn)s) + PyArray_STRIDES(%(zn)s)[0] * m);
if (Szn < 0) if (Szn < 0)
z_row += (N - 1) * Szn; z_row += (N - 1) * Szn;
...@@ -775,16 +775,16 @@ class CSMGradC(gof.Op): ...@@ -775,16 +775,16 @@ class CSMGradC(gof.Op):
if (PyArray_NDIM(%(b_ind)s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(b_ind) != 1"); %(fail)s;} if (PyArray_NDIM(%(b_ind)s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(b_ind) != 1"); %(fail)s;}
if (PyArray_NDIM(%(b_ptr)s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(b_ptr) != 1"); %(fail)s;} if (PyArray_NDIM(%(b_ptr)s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(b_ptr) != 1"); %(fail)s;}
if (PyArray_DESCR(%(a_ind)s)->type_num != NPY_INT32) { if (PyArray_TYPE(%(a_ind)s) != NPY_INT32) {
PyErr_SetString(PyExc_NotImplementedError, "a_ind dtype not INT32"); %(fail)s;} PyErr_SetString(PyExc_NotImplementedError, "a_ind dtype not INT32"); %(fail)s;}
if (PyArray_DESCR(%(a_ptr)s)->type_num != NPY_INT32) if (PyArray_TYPE(%(a_ptr)s) != NPY_INT32)
{PyErr_SetString(PyExc_NotImplementedError, "a_ptr dtype not INT32"); %(fail)s;} {PyErr_SetString(PyExc_NotImplementedError, "a_ptr dtype not INT32"); %(fail)s;}
if (PyArray_DESCR(%(b_ind)s)->type_num != NPY_INT32) { if (PyArray_TYPE(%(b_ind)s) != NPY_INT32) {
PyErr_SetString(PyExc_NotImplementedError, "b_ind dtype not INT32"); %(fail)s;} PyErr_SetString(PyExc_NotImplementedError, "b_ind dtype not INT32"); %(fail)s;}
if (PyArray_DESCR(%(b_ptr)s)->type_num != NPY_INT32) if (PyArray_TYPE(%(b_ptr)s) != NPY_INT32)
{PyErr_SetString(PyExc_NotImplementedError, "b_ptr dtype not INT32"); %(fail)s;} {PyErr_SetString(PyExc_NotImplementedError, "b_ptr dtype not INT32"); %(fail)s;}
if (PyArray_DIMS(%(a_val)s)[0] != PyArray_DIMS(%(a_ind)s)[0]) if (PyArray_DIMS(%(a_val)s)[0] != PyArray_DIMS(%(a_ind)s)[0])
...@@ -807,28 +807,28 @@ class CSMGradC(gof.Op): ...@@ -807,28 +807,28 @@ class CSMGradC(gof.Op):
{ {
// sparse array has size MxK, dense KxN, output MxN // sparse array has size MxK, dense KxN, output MxN
npy_intp M = PyArray_DIMS(%(a_ptr)s)[0] - 1; npy_intp M = PyArray_DIMS(%(a_ptr)s)[0] - 1;
npy_intp a_dim_0 = ((npy_int32 *)%(a_dim)s->data)[0]; npy_intp a_dim_0 = ((npy_int32 *)PyArray_DATA(%(a_dim)s))[0];
npy_intp a_dim_1 = ((npy_int32 *)%(a_dim)s->data)[1]; npy_intp a_dim_1 = ((npy_int32 *)PyArray_DATA(%(a_dim)s))[1];
npy_intp sp_dim = (M == a_dim_0)?a_dim_1:a_dim_0; npy_intp sp_dim = (M == a_dim_0)?a_dim_1:a_dim_0;
// strides tell you how many bytes to skip to go to next column/row entry // strides tell you how many bytes to skip to go to next column/row entry
npy_intp Sz = %(z)s->strides[0] / PyArray_DESCR(%(z)s)->elsize; npy_intp Sz = PyArray_STRIDES(%(z)s)[0] / PyArray_DESCR(%(z)s)->elsize;
npy_intp Sa_val = %(a_val)s->strides[0] / PyArray_DESCR(%(a_val)s)->elsize; npy_intp Sa_val = PyArray_STRIDES(%(a_val)s)[0] / PyArray_DESCR(%(a_val)s)->elsize;
npy_intp Sa_ind = %(a_ind)s->strides[0] / PyArray_DESCR(%(a_ind)s)->elsize; npy_intp Sa_ind = PyArray_STRIDES(%(a_ind)s)[0] / PyArray_DESCR(%(a_ind)s)->elsize;
npy_intp Sa_ptr = %(a_ptr)s->strides[0] / PyArray_DESCR(%(a_ptr)s)->elsize; npy_intp Sa_ptr = PyArray_STRIDES(%(a_ptr)s)[0] / PyArray_DESCR(%(a_ptr)s)->elsize;
npy_intp Sb_val = %(b_val)s->strides[0] / PyArray_DESCR(%(b_val)s)->elsize; npy_intp Sb_val = PyArray_STRIDES(%(b_val)s)[0] / PyArray_DESCR(%(b_val)s)->elsize;
npy_intp Sb_ind = %(b_ind)s->strides[0] / PyArray_DESCR(%(b_ind)s)->elsize; npy_intp Sb_ind = PyArray_STRIDES(%(b_ind)s)[0] / PyArray_DESCR(%(b_ind)s)->elsize;
npy_intp Sb_ptr = %(b_ptr)s->strides[0] / PyArray_DESCR(%(b_ptr)s)->elsize; npy_intp Sb_ptr = PyArray_STRIDES(%(b_ptr)s)[0] / PyArray_DESCR(%(b_ptr)s)->elsize;
// pointers to access actual data in the arrays passed as params. // pointers to access actual data in the arrays passed as params.
dtype_%(z)s* __restrict__ Dz = (dtype_%(z)s*)%(z)s->data; dtype_%(z)s* __restrict__ Dz = (dtype_%(z)s*)PyArray_DATA(%(z)s);
const dtype_%(a_val)s* __restrict__ Da_val = (dtype_%(a_val)s*)%(a_val)s->data; const dtype_%(a_val)s* __restrict__ Da_val = (dtype_%(a_val)s*)PyArray_DATA(%(a_val)s);
const npy_int32 * __restrict__ Da_ind = (npy_int32*)%(a_ind)s->data; const npy_int32 * __restrict__ Da_ind = (npy_int32*)PyArray_DATA(%(a_ind)s);
const npy_int32 * __restrict__ Da_ptr = (npy_int32*)%(a_ptr)s->data; const npy_int32 * __restrict__ Da_ptr = (npy_int32*)PyArray_DATA(%(a_ptr)s);
const dtype_%(b_val)s* __restrict__ Db_val = (dtype_%(b_val)s*)%(b_val)s->data; const dtype_%(b_val)s* __restrict__ Db_val = (dtype_%(b_val)s*)PyArray_DATA(%(b_val)s);
const npy_int32 * __restrict__ Db_ind = (npy_int32*)%(b_ind)s->data; const npy_int32 * __restrict__ Db_ind = (npy_int32*)PyArray_DATA(%(b_ind)s);
const npy_int32 * __restrict__ Db_ptr = (npy_int32*)%(b_ptr)s->data; const npy_int32 * __restrict__ Db_ptr = (npy_int32*)PyArray_DATA(%(b_ptr)s);
npy_intp nnz = PyArray_DIMS(%(a_ind)s)[0]; npy_intp nnz = PyArray_DIMS(%(a_ind)s)[0];
...@@ -937,10 +937,10 @@ class MulSDCSC(gof.Op): ...@@ -937,10 +937,10 @@ class MulSDCSC(gof.Op):
PyErr_SetString(PyExc_NotImplementedError, "rank(indptr) != 1"); PyErr_SetString(PyExc_NotImplementedError, "rank(indptr) != 1");
%(fail)s;} %(fail)s;}
if( PyArray_DESCR(%(_indices)s)->type_num != NPY_INT32) { if( PyArray_TYPE(%(_indices)s) != NPY_INT32) {
PyErr_SetString(PyExc_NotImplementedError, "C"); %(fail)s;} PyErr_SetString(PyExc_NotImplementedError, "C"); %(fail)s;}
if( PyArray_DESCR(%(_indptr)s)->type_num != NPY_INT32) if( PyArray_TYPE(%(_indptr)s) != NPY_INT32)
{PyErr_SetString(PyExc_NotImplementedError, "D"); %(fail)s;} {PyErr_SetString(PyExc_NotImplementedError, "D"); %(fail)s;}
if (!%(_zout)s || if (!%(_zout)s ||
...@@ -949,7 +949,7 @@ class MulSDCSC(gof.Op): ...@@ -949,7 +949,7 @@ class MulSDCSC(gof.Op):
{ {
Py_XDECREF(%(_zout)s); Py_XDECREF(%(_zout)s);
%(_zout)s = (PyArrayObject*) PyArray_SimpleNew(1, %(_zout)s = (PyArrayObject*) PyArray_SimpleNew(1,
PyArray_DIMS(%(_indices)s), PyArray_DESCR(%(_b)s)->type_num); PyArray_DIMS(%(_indices)s), PyArray_TYPE(%(_b)s));
if (!%(_zout)s) if (!%(_zout)s)
{ {
PyErr_SetString(PyExc_MemoryError, PyErr_SetString(PyExc_MemoryError,
...@@ -963,13 +963,13 @@ class MulSDCSC(gof.Op): ...@@ -963,13 +963,13 @@ class MulSDCSC(gof.Op):
//TODO: error checking with this //TODO: error checking with this
const npy_intp N = PyArray_DIMS(%(_indptr)s)[0]-1; const npy_intp N = PyArray_DIMS(%(_indptr)s)[0]-1;
const dtype_%(_data)s * const __restrict__ data = (dtype_%(_data)s*)%(_data)s->data; const dtype_%(_data)s * const __restrict__ data = (dtype_%(_data)s*)PyArray_DATA(%(_data)s);
const npy_int32 * const __restrict__ indptr = (npy_int32 *)%(_indptr)s->data; const npy_int32 * const __restrict__ indptr = (npy_int32 *)PyArray_DATA(%(_indptr)s);
const npy_int32 * const __restrict__ indices = (npy_int32 *)%(_indices)s->data; const npy_int32 * const __restrict__ indices = (npy_int32 *)PyArray_DATA(%(_indices)s);
dtype_%(_zout)s * const __restrict__ zout = (dtype_%(_zout)s*)%(_zout)s->data; dtype_%(_zout)s * const __restrict__ zout = (dtype_%(_zout)s*)PyArray_DATA(%(_zout)s);
const npy_intp Sb = %(_b)s->strides[0]; const npy_intp Sb = PyArray_STRIDES(%(_b)s)[0];
// loop over columns // loop over columns
for (npy_int32 j = 0; j < N; ++j) for (npy_int32 j = 0; j < N; ++j)
...@@ -981,7 +981,7 @@ class MulSDCSC(gof.Op): ...@@ -981,7 +981,7 @@ class MulSDCSC(gof.Op):
npy_int32 i = indices[i_idx]; npy_int32 i = indices[i_idx];
// extract i-th row of dense matrix // extract i-th row of dense matrix
const dtype_%(_b)s* __restrict__ b_row = (dtype_%(_b)s*)(%(_b)s->data + Sb * i); const dtype_%(_b)s* __restrict__ b_row = (dtype_%(_b)s*)(PyArray_BYTES(%(_b)s) + Sb * i);
// write resulting gradient to sparse output // write resulting gradient to sparse output
zout[i_idx] = data[i_idx] * b_row[j]; zout[i_idx] = data[i_idx] * b_row[j];
...@@ -1053,10 +1053,10 @@ class MulSDCSR(gof.Op): ...@@ -1053,10 +1053,10 @@ class MulSDCSR(gof.Op):
PyErr_SetString(PyExc_NotImplementedError, "rank(indptr) != 1"); PyErr_SetString(PyExc_NotImplementedError, "rank(indptr) != 1");
%(fail)s;} %(fail)s;}
if( PyArray_DESCR(%(_indices)s)->type_num != NPY_INT32) { if( PyArray_TYPE(%(_indices)s) != NPY_INT32) {
PyErr_SetString(PyExc_NotImplementedError, "C"); %(fail)s;} PyErr_SetString(PyExc_NotImplementedError, "C"); %(fail)s;}
if( PyArray_DESCR(%(_indptr)s)->type_num != NPY_INT32) if( PyArray_TYPE(%(_indptr)s) != NPY_INT32)
{PyErr_SetString(PyExc_NotImplementedError, "D"); %(fail)s;} {PyErr_SetString(PyExc_NotImplementedError, "D"); %(fail)s;}
if (!%(_zout)s || if (!%(_zout)s ||
...@@ -1065,7 +1065,7 @@ class MulSDCSR(gof.Op): ...@@ -1065,7 +1065,7 @@ class MulSDCSR(gof.Op):
{ {
Py_XDECREF(%(_zout)s); Py_XDECREF(%(_zout)s);
%(_zout)s = (PyArrayObject*) PyArray_SimpleNew(1, %(_zout)s = (PyArrayObject*) PyArray_SimpleNew(1,
PyArray_DIMS(%(_indices)s), PyArray_DESCR(%(_b)s)->type_num); PyArray_DIMS(%(_indices)s), PyArray_TYPE(%(_b)s));
if (!%(_zout)s) if (!%(_zout)s)
{ {
PyErr_SetString(PyExc_MemoryError, PyErr_SetString(PyExc_MemoryError,
...@@ -1079,19 +1079,19 @@ class MulSDCSR(gof.Op): ...@@ -1079,19 +1079,19 @@ class MulSDCSR(gof.Op):
//TODO: error checking with this //TODO: error checking with this
const npy_intp N = PyArray_DIMS(%(_indptr)s)[0]-1; const npy_intp N = PyArray_DIMS(%(_indptr)s)[0]-1;
const dtype_%(_data)s * const __restrict__ data = (dtype_%(_data)s*)%(_data)s->data; const dtype_%(_data)s * const __restrict__ data = (dtype_%(_data)s*)PyArray_DATA(%(_data)s);
const npy_int32 * const __restrict__ indptr = (npy_int32 *)%(_indptr)s->data; const npy_int32 * const __restrict__ indptr = (npy_int32 *)PyArray_DATA(%(_indptr)s);
const npy_int32 * const __restrict__ indices = (npy_int32 *)%(_indices)s->data; const npy_int32 * const __restrict__ indices = (npy_int32 *)PyArray_DATA(%(_indices)s);
dtype_%(_zout)s * const __restrict__ zout = (dtype_%(_zout)s*)%(_zout)s->data; dtype_%(_zout)s * const __restrict__ zout = (dtype_%(_zout)s*)PyArray_DATA(%(_zout)s);
const npy_intp Sb = %(_b)s->strides[0]; const npy_intp Sb = PyArray_STRIDES(%(_b)s)[0];
// loop over columns // loop over columns
for (npy_int32 j = 0; j < N; ++j) for (npy_int32 j = 0; j < N; ++j)
{ {
// extract i-th row of dense matrix // extract i-th row of dense matrix
const dtype_%(_b)s* __restrict__ b_row = (dtype_%(_b)s*)(%(_b)s->data + Sb * j); const dtype_%(_b)s* __restrict__ b_row = (dtype_%(_b)s*)(PyArray_BYTES(%(_b)s) + Sb * j);
// for each non-null value in the sparse column // for each non-null value in the sparse column
for (npy_int32 i_idx = indptr[j]; i_idx < indptr[j+1]; ++i_idx) for (npy_int32 i_idx = indptr[j]; i_idx < indptr[j+1]; ++i_idx)
...@@ -1209,10 +1209,10 @@ class MulSVCSR(gof.Op): ...@@ -1209,10 +1209,10 @@ class MulSVCSR(gof.Op):
%(fail)s; %(fail)s;
} }
if( PyArray_DESCR(%(_indices)s)->type_num != NPY_INT32) { if( PyArray_TYPE(%(_indices)s) != NPY_INT32) {
PyErr_SetString(PyExc_NotImplementedError, "C"); %(fail)s;} PyErr_SetString(PyExc_NotImplementedError, "C"); %(fail)s;}
if( PyArray_DESCR(%(_indptr)s)->type_num != NPY_INT32) if( PyArray_TYPE(%(_indptr)s) != NPY_INT32)
{PyErr_SetString(PyExc_NotImplementedError, "D"); %(fail)s;} {PyErr_SetString(PyExc_NotImplementedError, "D"); %(fail)s;}
if (!%(_zout)s if (!%(_zout)s
...@@ -1221,7 +1221,7 @@ class MulSVCSR(gof.Op): ...@@ -1221,7 +1221,7 @@ class MulSVCSR(gof.Op):
{ {
Py_XDECREF(%(_zout)s); Py_XDECREF(%(_zout)s);
%(_zout)s = (PyArrayObject*) PyArray_SimpleNew(1, %(_zout)s = (PyArrayObject*) PyArray_SimpleNew(1,
PyArray_DIMS(%(_indices)s), PyArray_DESCR(%(_b)s)->type_num); PyArray_DIMS(%(_indices)s), PyArray_TYPE(%(_b)s));
} }
{ //makes it compile even though labels jump over variable definitions. { //makes it compile even though labels jump over variable definitions.
...@@ -1229,15 +1229,15 @@ class MulSVCSR(gof.Op): ...@@ -1229,15 +1229,15 @@ class MulSVCSR(gof.Op):
//TODO: error checking with this //TODO: error checking with this
const npy_intp N = PyArray_DIMS(%(_indptr)s)[0]-1; const npy_intp N = PyArray_DIMS(%(_indptr)s)[0]-1;
const dtype_%(_data)s * const __restrict__ data = (dtype_%(_data)s*)%(_data)s->data; const dtype_%(_data)s * const __restrict__ data = (dtype_%(_data)s*)PyArray_DATA(%(_data)s);
const npy_int32 * const __restrict__ indptr = (npy_int32 *)%(_indptr)s->data; const npy_int32 * const __restrict__ indptr = (npy_int32 *)PyArray_DATA(%(_indptr)s);
const npy_int32 * const __restrict__ indices = (npy_int32 *)%(_indices)s->data; const npy_int32 * const __restrict__ indices = (npy_int32 *)PyArray_DATA(%(_indices)s);
const dtype_%(_b)s* __restrict__ Db = (dtype_%(_b)s*)%(_b)s->data; const dtype_%(_b)s* __restrict__ Db = (dtype_%(_b)s*)PyArray_DATA(%(_b)s);
dtype_%(_zout)s * const __restrict__ zout = (dtype_%(_zout)s*)%(_zout)s->data; dtype_%(_zout)s * const __restrict__ zout = (dtype_%(_zout)s*)PyArray_DATA(%(_zout)s);
const npy_intp Sb = %(_b)s->strides[0] / PyArray_DESCR(%(_b)s)->elsize; const npy_intp Sb = PyArray_STRIDES(%(_b)s)[0] / PyArray_DESCR(%(_b)s)->elsize;
// loop over rows // loop over rows
for (npy_int32 j = 0; j < N; ++j) for (npy_int32 j = 0; j < N; ++j)
...@@ -1359,10 +1359,10 @@ class StructuredAddSVCSR(gof.Op): ...@@ -1359,10 +1359,10 @@ class StructuredAddSVCSR(gof.Op):
%(fail)s; %(fail)s;
} }
if( PyArray_DESCR(%(_indices)s)->type_num != NPY_INT32) { if( PyArray_TYPE(%(_indices)s) != NPY_INT32) {
PyErr_SetString(PyExc_NotImplementedError, "C"); %(fail)s;} PyErr_SetString(PyExc_NotImplementedError, "C"); %(fail)s;}
if( PyArray_DESCR(%(_indptr)s)->type_num != NPY_INT32) if( PyArray_TYPE(%(_indptr)s) != NPY_INT32)
{PyErr_SetString(PyExc_NotImplementedError, "D"); %(fail)s;} {PyErr_SetString(PyExc_NotImplementedError, "D"); %(fail)s;}
if (!%(_zout)s if (!%(_zout)s
...@@ -1371,7 +1371,7 @@ class StructuredAddSVCSR(gof.Op): ...@@ -1371,7 +1371,7 @@ class StructuredAddSVCSR(gof.Op):
{ {
Py_XDECREF(%(_zout)s); Py_XDECREF(%(_zout)s);
%(_zout)s = (PyArrayObject*) PyArray_SimpleNew(1, %(_zout)s = (PyArrayObject*) PyArray_SimpleNew(1,
PyArray_DIMS(%(_indices)s), PyArray_DESCR(%(_b)s)->type_num); PyArray_DIMS(%(_indices)s), PyArray_TYPE(%(_b)s));
if (!%(_zout)s) if (!%(_zout)s)
{ {
PyErr_SetString(PyExc_MemoryError, PyErr_SetString(PyExc_MemoryError,
...@@ -1385,15 +1385,15 @@ class StructuredAddSVCSR(gof.Op): ...@@ -1385,15 +1385,15 @@ class StructuredAddSVCSR(gof.Op):
//TODO: error checking with this //TODO: error checking with this
const npy_intp N = PyArray_DIMS(%(_indptr)s)[0]-1; const npy_intp N = PyArray_DIMS(%(_indptr)s)[0]-1;
const dtype_%(_data)s * const __restrict__ data = (dtype_%(_data)s*)%(_data)s->data; const dtype_%(_data)s * const __restrict__ data = (dtype_%(_data)s*)PyArray_DATA(%(_data)s);
const npy_int32 * const __restrict__ indptr = (npy_int32 *)%(_indptr)s->data; const npy_int32 * const __restrict__ indptr = (npy_int32 *)PyArray_DATA(%(_indptr)s);
const npy_int32 * const __restrict__ indices = (npy_int32 *)%(_indices)s->data; const npy_int32 * const __restrict__ indices = (npy_int32 *)PyArray_DATA(%(_indices)s);
const dtype_%(_b)s* __restrict__ Db = (dtype_%(_b)s*)%(_b)s->data; const dtype_%(_b)s* __restrict__ Db = (dtype_%(_b)s*)PyArray_DATA(%(_b)s);
dtype_%(_zout)s * const __restrict__ zout = (dtype_%(_zout)s*)%(_zout)s->data; dtype_%(_zout)s * const __restrict__ zout = (dtype_%(_zout)s*)PyArray_DATA(%(_zout)s);
const npy_intp Sb = %(_b)s->strides[0] / PyArray_DESCR(%(_b)s)->elsize; const npy_intp Sb = PyArray_STRIDES(%(_b)s)[0] / PyArray_DESCR(%(_b)s)->elsize;
// loop over columns // loop over columns
for (npy_int32 j = 0; j < N; ++j) for (npy_int32 j = 0; j < N; ++j)
...@@ -1575,17 +1575,17 @@ PyErr_SetString(PyExc_NotImplementedError, "rank(x) != 2"); %(fail)s;} ...@@ -1575,17 +1575,17 @@ PyErr_SetString(PyExc_NotImplementedError, "rank(x) != 2"); %(fail)s;}
if (PyArray_NDIM(%(y)s) != 2) { if (PyArray_NDIM(%(y)s) != 2) {
PyErr_SetString(PyExc_NotImplementedError, "rank(y) != 2"); %(fail)s;} PyErr_SetString(PyExc_NotImplementedError, "rank(y) != 2"); %(fail)s;}
if (PyArray_DESCR(%(x)s)->type_num != %(typenum_x)s) { if (PyArray_TYPE(%(x)s) != %(typenum_x)s) {
PyErr_SetString(PyExc_NotImplementedError, PyErr_SetString(PyExc_NotImplementedError,
"Invalid type for x"); "Invalid type for x");
%(fail)s;} %(fail)s;}
if (PyArray_DESCR(%(y)s)->type_num != %(typenum_y)s) { if (PyArray_TYPE(%(y)s) != %(typenum_y)s) {
PyErr_SetString(PyExc_NotImplementedError, PyErr_SetString(PyExc_NotImplementedError,
"Invalid type for y"); "Invalid type for y");
%(fail)s;} %(fail)s;}
if (PyArray_DESCR(%(p_data)s)->type_num != %(typenum_p)s) { if (PyArray_TYPE(%(p_data)s) != %(typenum_p)s) {
PyErr_SetString(PyExc_NotImplementedError, PyErr_SetString(PyExc_NotImplementedError,
"Invalid type for pattern"); "Invalid type for pattern");
%(fail)s;} %(fail)s;}
...@@ -1595,7 +1595,7 @@ PyErr_SetString(PyExc_NotImplementedError, "rank(y) != 2"); %(fail)s;} ...@@ -1595,7 +1595,7 @@ PyErr_SetString(PyExc_NotImplementedError, "rank(y) != 2"); %(fail)s;}
"x's number of columns doesn't match y's rows! Note: sampling_dot is different from dot because y is assumed to be transposed."); "x's number of columns doesn't match y's rows! Note: sampling_dot is different from dot because y is assumed to be transposed.");
%(fail)s;} %(fail)s;}
if (PyArray_DIMS(%(y)s)[0] != ((npy_int32 *)%(p_ncols)s->data)[0] || if (PyArray_DIMS(%(y)s)[0] != ((npy_int32 *)PyArray_DATA(%(p_ncols)s))[0] ||
PyArray_DIMS(%(x)s)[0] != (PyArray_DIMS(%(p_ptr)s)[0] - 1)) PyArray_DIMS(%(x)s)[0] != (PyArray_DIMS(%(p_ptr)s)[0] - 1))
{PyErr_SetString(PyExc_NotImplementedError, {PyErr_SetString(PyExc_NotImplementedError,
"The dimension of the pattern and the output must match"); %(fail)s;} "The dimension of the pattern and the output must match"); %(fail)s;}
...@@ -1603,7 +1603,7 @@ PyErr_SetString(PyExc_NotImplementedError, "rank(y) != 2"); %(fail)s;} ...@@ -1603,7 +1603,7 @@ PyErr_SetString(PyExc_NotImplementedError, "rank(y) != 2"); %(fail)s;}
// Allocate output // Allocate output
if (!%(z_data)s if (!%(z_data)s
|| (PyArray_DIMS(%(z_data)s)[0] != PyArray_DIMS(%(p_data)s)[0]) || (PyArray_DIMS(%(z_data)s)[0] != PyArray_DIMS(%(p_data)s)[0])
|| (PyArray_DESCR(%(z_data)s)->type_num != %(typenum_zd)s) || (PyArray_TYPE(%(z_data)s) != %(typenum_zd)s)
|| !(PyArray_ISCONTIGUOUS(%(z_data)s))) || !(PyArray_ISCONTIGUOUS(%(z_data)s)))
{ {
{Py_XDECREF(%(z_data)s);} {Py_XDECREF(%(z_data)s);}
...@@ -1614,7 +1614,7 @@ PyErr_SetString(PyExc_NotImplementedError, "rank(y) != 2"); %(fail)s;} ...@@ -1614,7 +1614,7 @@ PyErr_SetString(PyExc_NotImplementedError, "rank(y) != 2"); %(fail)s;}
} }
if (!%(z_ind)s if (!%(z_ind)s
|| (PyArray_DIMS(%(z_ind)s)[0] != PyArray_DIMS(%(p_ind)s)[0]) || (PyArray_DIMS(%(z_ind)s)[0] != PyArray_DIMS(%(p_ind)s)[0])
|| (PyArray_DESCR(%(z_ind)s)->type_num != %(typenum_zi)s) || (PyArray_TYPE(%(z_ind)s) != %(typenum_zi)s)
|| !(PyArray_ISCONTIGUOUS(%(z_ind)s))) || !(PyArray_ISCONTIGUOUS(%(z_ind)s)))
{ {
{Py_XDECREF(%(z_ind)s);} {Py_XDECREF(%(z_ind)s);}
...@@ -1625,7 +1625,7 @@ PyErr_SetString(PyExc_NotImplementedError, "rank(y) != 2"); %(fail)s;} ...@@ -1625,7 +1625,7 @@ PyErr_SetString(PyExc_NotImplementedError, "rank(y) != 2"); %(fail)s;}
} }
if (!%(z_ptr)s if (!%(z_ptr)s
|| (PyArray_DIMS(%(z_ptr)s)[0] != PyArray_DIMS(%(p_ptr)s)[0]) || (PyArray_DIMS(%(z_ptr)s)[0] != PyArray_DIMS(%(p_ptr)s)[0])
|| (PyArray_DESCR(%(z_ptr)s)->type_num != %(typenum_zp)s) || (PyArray_TYPE(%(z_ptr)s) != %(typenum_zp)s)
|| !(PyArray_ISCONTIGUOUS(%(z_ptr)s))) || !(PyArray_ISCONTIGUOUS(%(z_ptr)s)))
{ {
{Py_XDECREF(%(z_ptr)s);} {Py_XDECREF(%(z_ptr)s);}
...@@ -1642,23 +1642,23 @@ PyErr_SetString(PyExc_NotImplementedError, "rank(y) != 2"); %(fail)s;} ...@@ -1642,23 +1642,23 @@ PyErr_SetString(PyExc_NotImplementedError, "rank(y) != 2"); %(fail)s;}
npy_intp K = PyArray_DIMS(%(y)s)[1]; npy_intp K = PyArray_DIMS(%(y)s)[1];
// pointers to access actual data in the arrays passed as params. // pointers to access actual data in the arrays passed as params.
const dtype_%(x)s* __restrict__ Dx = (dtype_%(x)s*)%(x)s->data; const dtype_%(x)s* __restrict__ Dx = (dtype_%(x)s*)PyArray_DATA(%(x)s);
const dtype_%(y)s* __restrict__ Dy = (dtype_%(y)s*)%(y)s->data; const dtype_%(y)s* __restrict__ Dy = (dtype_%(y)s*)PyArray_DATA(%(y)s);
const dtype_%(p_data)s* __restrict__ Dpd = (dtype_%(p_data)s*)%(p_data)s->data; const dtype_%(p_data)s* __restrict__ Dpd = (dtype_%(p_data)s*)PyArray_DATA(%(p_data)s);
const dtype_%(p_ind)s* __restrict__ Dpi = (dtype_%(p_ind)s*)%(p_ind)s->data; const dtype_%(p_ind)s* __restrict__ Dpi = (dtype_%(p_ind)s*)PyArray_DATA(%(p_ind)s);
const dtype_%(p_ptr)s* __restrict__ Dpp = (dtype_%(p_ptr)s*)%(p_ptr)s->data; const dtype_%(p_ptr)s* __restrict__ Dpp = (dtype_%(p_ptr)s*)PyArray_DATA(%(p_ptr)s);
dtype_%(z_data)s* __restrict__ Dzd = (dtype_%(z_data)s*)%(z_data)s->data; dtype_%(z_data)s* __restrict__ Dzd = (dtype_%(z_data)s*)PyArray_DATA(%(z_data)s);
dtype_%(z_ind)s* __restrict__ Dzi = (dtype_%(z_ind)s*)%(z_ind)s->data; dtype_%(z_ind)s* __restrict__ Dzi = (dtype_%(z_ind)s*)PyArray_DATA(%(z_ind)s);
dtype_%(z_ptr)s* __restrict__ Dzp = (dtype_%(z_ptr)s*)%(z_ptr)s->data; dtype_%(z_ptr)s* __restrict__ Dzp = (dtype_%(z_ptr)s*)PyArray_DATA(%(z_ptr)s);
const npy_intp Sdx = %(x)s->strides[1]/PyArray_DESCR(%(x)s)->elsize; const npy_intp Sdx = PyArray_STRIDES(%(x)s)[1]/PyArray_DESCR(%(x)s)->elsize;
const npy_intp Sdy = %(y)s->strides[1]/PyArray_DESCR(%(y)s)->elsize; const npy_intp Sdy = PyArray_STRIDES(%(y)s)[1]/PyArray_DESCR(%(y)s)->elsize;
const npy_intp Sdpd = %(p_data)s->strides[0] / PyArray_DESCR(%(p_data)s)->elsize; const npy_intp Sdpd = PyArray_STRIDES(%(p_data)s)[0] / PyArray_DESCR(%(p_data)s)->elsize;
const npy_intp Sdpi = %(p_ind)s->strides[0] / PyArray_DESCR(%(p_ind)s)->elsize; const npy_intp Sdpi = PyArray_STRIDES(%(p_ind)s)[0] / PyArray_DESCR(%(p_ind)s)->elsize;
const npy_intp Sdpp = %(p_ptr)s->strides[0] / PyArray_DESCR(%(p_ptr)s)->elsize; const npy_intp Sdpp = PyArray_STRIDES(%(p_ptr)s)[0] / PyArray_DESCR(%(p_ptr)s)->elsize;
const npy_intp Sdzd = %(z_data)s->strides[0] / PyArray_DESCR(%(z_data)s)->elsize; const npy_intp Sdzd = PyArray_STRIDES(%(z_data)s)[0] / PyArray_DESCR(%(z_data)s)->elsize;
const npy_intp Sdzi = %(z_ind)s->strides[0] / PyArray_DESCR(%(z_ind)s)->elsize; const npy_intp Sdzi = PyArray_STRIDES(%(z_ind)s)[0] / PyArray_DESCR(%(z_ind)s)->elsize;
const npy_intp Sdzp = %(z_ptr)s->strides[0] / PyArray_DESCR(%(z_ptr)s)->elsize; const npy_intp Sdzp = PyArray_STRIDES(%(z_ptr)s)[0] / PyArray_DESCR(%(z_ptr)s)->elsize;
memcpy(Dzi, Dpi, PyArray_DIMS(%(p_ind)s)[0]*sizeof(dtype_%(p_ind)s)); memcpy(Dzi, Dpi, PyArray_DIMS(%(p_ind)s)[0]*sizeof(dtype_%(p_ind)s));
memcpy(Dzp, Dpp, PyArray_DIMS(%(p_ptr)s)[0]*sizeof(dtype_%(p_ptr)s)); memcpy(Dzp, Dpp, PyArray_DIMS(%(p_ptr)s)[0]*sizeof(dtype_%(p_ptr)s));
...@@ -1667,9 +1667,9 @@ PyErr_SetString(PyExc_NotImplementedError, "rank(y) != 2"); %(fail)s;} ...@@ -1667,9 +1667,9 @@ PyErr_SetString(PyExc_NotImplementedError, "rank(y) != 2"); %(fail)s;}
for (npy_int32 n_idx = Dpp[m * Sdpp]; n_idx < Dpp[(m+1)*Sdpp]; ++n_idx) { for (npy_int32 n_idx = Dpp[m * Sdpp]; n_idx < Dpp[(m+1)*Sdpp]; ++n_idx) {
const npy_int32 n = Dpi[n_idx * Sdpi]; // row index of non-null value for column K const npy_int32 n = Dpi[n_idx * Sdpi]; // row index of non-null value for column K
const dtype_%(x)s* x_row = (dtype_%(x)s*)(%(x)s->data + %(x)s->strides[0] * m); const dtype_%(x)s* x_row = (dtype_%(x)s*)(PyArray_BYTES(%(x)s) + PyArray_STRIDES(%(x)s)[0] * m);
const dtype_%(y)s* y_col = (dtype_%(y)s*)(%(y)s->data + %(y)s->strides[0] * n); const dtype_%(y)s* y_col = (dtype_%(y)s*)(PyArray_BYTES(%(y)s) + PyArray_STRIDES(%(y)s)[0] * n);
Dzd[n_idx * Sdzd] = Dpd[n_idx * Sdpd] * %(cdot)s((int*)&K, (const %(conv_type)s*)x_row, (int*)&Sdx, (const %(conv_type)s*)y_col, (int*)&Sdy); Dzd[n_idx * Sdzd] = Dpd[n_idx * Sdpd] * %(cdot)s((int*)&K, (const %(conv_type)s*)x_row, (int*)&Sdx, (const %(conv_type)s*)y_col, (int*)&Sdy);
} }
......
...@@ -3905,7 +3905,7 @@ class Reshape(Op): ...@@ -3905,7 +3905,7 @@ class Reshape(Op):
} }
Py_XDECREF(%(z)s); Py_XDECREF(%(z)s);
%(z)s = (PyArrayObject *) PyArray_Newshape(%(x)s, &newshape, %(z)s = (PyArrayObject *) PyArray_Newshape(%(x)s, &newshape,
PyArray_CORDER); NPY_CORDER);
if (!%(z)s) if (!%(z)s)
{ {
//The error message should have been set by PyArray_Newshape //The error message should have been set by PyArray_Newshape
......
...@@ -336,7 +336,7 @@ class DimShuffle(Op): ...@@ -336,7 +336,7 @@ class DimShuffle(Op):
'PyArray_UpdateFlags(%(res)s, NPY_ARRAY_UPDATE_ALL)', 'PyArray_UpdateFlags(%(res)s, NPY_ARRAY_UPDATE_ALL)',
#we are making a view in both inplace and non-inplace cases #we are making a view in both inplace and non-inplace cases
""" """
#if NPY_VERSION <= 0x01000009 #if NPY_API_VERSION < 0x00000007
PyArray_BASE(%(res)s) = (PyObject*)%(basename)s; PyArray_BASE(%(res)s) = (PyObject*)%(basename)s;
#else #else
PyArray_SetBaseObject(%(res)s, (PyObject*)%(basename)s); PyArray_SetBaseObject(%(res)s, (PyObject*)%(basename)s);
......
...@@ -118,14 +118,14 @@ class SoftmaxWithBias(gof.Op): ...@@ -118,14 +118,14 @@ class SoftmaxWithBias(gof.Op):
PyErr_SetString(PyExc_ValueError, "b not 1d tensor"); PyErr_SetString(PyExc_ValueError, "b not 1d tensor");
%(fail)s; %(fail)s;
} }
if ((PyArray_DESCR(%(x)s)->type_num != NPY_DOUBLE) && if ((PyArray_TYPE(%(x)s) != NPY_DOUBLE) &&
(PyArray_DESCR(%(x)s)->type_num != NPY_FLOAT)) (PyArray_TYPE(%(x)s) != NPY_FLOAT))
{ {
PyErr_SetString(PyExc_TypeError, "not a float"); PyErr_SetString(PyExc_TypeError, "not a float");
%(fail)s; %(fail)s;
} }
if ((PyArray_DESCR(%(b)s)->type_num != NPY_DOUBLE) && if ((PyArray_TYPE(%(b)s) != NPY_DOUBLE) &&
(PyArray_DESCR(%(b)s)->type_num != NPY_FLOAT)) (PyArray_TYPE(%(b)s) != NPY_FLOAT))
{ {
PyErr_SetString(PyExc_TypeError, "b not float"); PyErr_SetString(PyExc_TypeError, "b not float");
%(fail)s; %(fail)s;
...@@ -264,15 +264,15 @@ class SoftmaxGrad(gof.Op): ...@@ -264,15 +264,15 @@ class SoftmaxGrad(gof.Op):
dy, sm = inp dy, sm = inp
dx, = out dx, = out
return ''' return '''
if ((PyArray_DESCR(%(dy)s)->type_num != NPY_DOUBLE) && if ((PyArray_TYPE(%(dy)s) != NPY_DOUBLE) &&
(PyArray_DESCR(%(dy)s)->type_num != NPY_FLOAT)) (PyArray_TYPE(%(dy)s) != NPY_FLOAT))
{ {
PyErr_SetString(PyExc_TypeError, PyErr_SetString(PyExc_TypeError,
"types should be float or float64"); "types should be float or float64");
%(fail)s; %(fail)s;
} }
if ((PyArray_DESCR(%(sm)s)->type_num != NPY_DOUBLE) && if ((PyArray_TYPE(%(sm)s) != NPY_DOUBLE) &&
(PyArray_DESCR(%(sm)s)->type_num != NPY_FLOAT)) (PyArray_TYPE(%(sm)s) != NPY_FLOAT))
{ {
PyErr_SetString(PyExc_TypeError, PyErr_SetString(PyExc_TypeError,
"types should be float or float64"); "types should be float or float64");
...@@ -395,23 +395,23 @@ class Softmax(gof.Op): ...@@ -395,23 +395,23 @@ class Softmax(gof.Op):
#TODO: use this to accept float32 and int32: node.inputs[0].type.dtype_specs()[1] #TODO: use this to accept float32 and int32: node.inputs[0].type.dtype_specs()[1]
init_decl = """ init_decl = """
npy_intp* Nx = %(x)s->dimensions; npy_intp* Nx = PyArray_DIMS(%(x)s);
if (%(x)s->nd != 2) if (PyArray_NDIM(%(x)s) != 2)
{ {
PyErr_SetString(PyExc_ValueError, "not a 2d tensor"); PyErr_SetString(PyExc_ValueError, "not a 2d tensor");
%(fail)s; %(fail)s;
} }
if ((%(x)s->descr->type_num != PyArray_DOUBLE) && if ((PyArray_TYPE(%(x)s) != NPY_DOUBLE) &&
(%(x)s->descr->type_num != PyArray_FLOAT)) (PyArray_TYPE(%(x)s) != NPY_FLOAT))
{ {
PyErr_SetString(PyExc_TypeError, "not a float"); PyErr_SetString(PyExc_TypeError, "not a float");
%(fail)s; %(fail)s;
} }
if ((NULL == %(sm)s) if ((NULL == %(sm)s)
|| (%(sm)s->dimensions[0] != %(x)s->dimensions[0]) || (PyArray_DIMS(%(sm)s)[0] != PyArray_DIMS(%(x)s)[0])
|| (%(sm)s->dimensions[1] != %(x)s->dimensions[1])) || (PyArray_DIMS(%(sm)s)[1] != PyArray_DIMS(%(x)s)[1]))
{ {
if (NULL != %(sm)s) Py_XDECREF(%(sm)s); if (NULL != %(sm)s) Py_XDECREF(%(sm)s);
%(sm)s = (PyArrayObject*)PyArray_SimpleNew(2, PyArray_DIMS(%(x)s), %(sm)s = (PyArrayObject*)PyArray_SimpleNew(2, PyArray_DIMS(%(x)s),
...@@ -431,13 +431,13 @@ class Softmax(gof.Op): ...@@ -431,13 +431,13 @@ class Softmax(gof.Op):
double sum = 0.0; double sum = 0.0;
bool discount_max = false; bool discount_max = false;
const dtype_%(x)s* __restrict__ x_i = (dtype_%(x)s*)(%(x)s->data + %(x)s->strides[0] * i); const dtype_%(x)s* __restrict__ x_i = (dtype_%(x)s*)(PyArray_BYTES(%(x)s) + PyArray_STRIDES(%(x)s)[0] * i);
dtype_%(sm) s* __restrict__ sm_i = (dtype_%(sm)s*)(%(sm)s->data + %(sm)s->strides[0] * i); dtype_%(sm) s* __restrict__ sm_i = (dtype_%(sm)s*)(PyArray_BYTES(%(sm)s) + PyArray_STRIDES(%(sm)s)[0] * i);
""" """
inside_row_loop = """ inside_row_loop = """
npy_intp Sx = %(x)s->strides[1]/sizeof(dtype_%(x)s); npy_intp Sx = PyArray_STRIDES(%(x)s)[1]/sizeof(dtype_%(x)s);
npy_intp Ssm = %(sm)s->strides[1]/sizeof(dtype_%(sm)s); npy_intp Ssm = PyArray_STRIDES(%(sm)s)[1]/sizeof(dtype_%(sm)s);
size_t row_max_j=0; size_t row_max_j=0;
dtype_%(sm)s row_max = x_i[0]; dtype_%(sm)s row_max = x_i[0];
...@@ -1018,15 +1018,15 @@ class CrossentropySoftmax1HotWithBiasDx (gof.Op): ...@@ -1018,15 +1018,15 @@ class CrossentropySoftmax1HotWithBiasDx (gof.Op):
y_idx_type = node.inputs[2].type.dtype_specs()[1] y_idx_type = node.inputs[2].type.dtype_specs()[1]
return """ return """
if ((PyArray_DESCR(%(dnll)s)->type_num != NPY_DOUBLE) && if ((PyArray_TYPE(%(dnll)s) != NPY_DOUBLE) &&
(PyArray_DESCR(%(dnll)s)->type_num != NPY_FLOAT)) (PyArray_TYPE(%(dnll)s) != NPY_FLOAT))
{ {
PyErr_SetString(PyExc_TypeError, PyErr_SetString(PyExc_TypeError,
"dnll type should be float32 or float64"); "dnll type should be float32 or float64");
%(fail)s; %(fail)s;
} }
if ((PyArray_DESCR(%(sm)s)->type_num != NPY_DOUBLE) && if ((PyArray_TYPE(%(sm)s) != NPY_DOUBLE) &&
(PyArray_DESCR(%(sm)s)->type_num != NPY_FLOAT)) (PyArray_TYPE(%(sm)s) != NPY_FLOAT))
{ {
PyErr_SetString(PyExc_TypeError, PyErr_SetString(PyExc_TypeError,
"sm type should be float32 or float64"); "sm type should be float32 or float64");
......
...@@ -9,14 +9,13 @@ _logger = logging.getLogger("theano.tensor.subtensor") ...@@ -9,14 +9,13 @@ _logger = logging.getLogger("theano.tensor.subtensor")
import numpy import numpy
import theano import theano
from theano.compat.six import StringIO
from theano.gradient import DisconnectedType from theano.gradient import DisconnectedType
from theano import gof from theano import gof
from theano.gof import Apply, Constant, hashtype, Op, Type, MethodNotDefined from theano.gof import Apply, Constant, hashtype, Op, Type, MethodNotDefined
from theano.gof.python25 import maxsize from theano.gof.python25 import maxsize
from theano.printing import pprint from theano.printing import pprint
from theano import scalar as scal from theano import scalar as scal
from theano.tensor.basic import (addbroadcast, clip, sum, exp, from theano.tensor.basic import (addbroadcast, clip,
ARange, TensorType) ARange, TensorType)
from theano.tensor.elemwise import DimShuffle from theano.tensor.elemwise import DimShuffle
from theano.tensor.type_other import NoneConst, SliceType, make_slice from theano.tensor.type_other import NoneConst, SliceType, make_slice
...@@ -532,58 +531,30 @@ class Subtensor(Op): ...@@ -532,58 +531,30 @@ class Subtensor(Op):
""" """
return { return {
"c_prefix": "PyArray", "c_prefix": "PyArray",
"update_flags": ("PyArray_UpdateFlags(%(view_name)s," "strides_mul": 1,
" NPY_ARRAY_C_CONTIGUOUS|" }
"NPY_ARRAY_F_CONTIGUOUS);"),
"set_data": "PyArray_set_data",
"set_dim": "PyArray_set_dim",
"set_stride": "PyArray_set_stride",
"strides_mul": 1,
"view_name": "xview"}
@staticmethod @staticmethod
def helper_c_code(node, name, inputs, outputs, sub, idx_list, def helper_c_code(node, name, inputs, outputs, sub, idx_list, view_ndim,
c_prefix=None, c_prefix=None,
update_flags=None,
set_data=None,
set_dim=None,
set_stride=None,
strides_mul=None, strides_mul=None,
view_name=None
): ):
""" """
The parameters c_prefix, update_flags, set_data, set_dim, The parameters c_prefix are there to allow reusing this
set_stride and strides_mul are there to allow reusing this
function on PyArray and CudaNdarray object. function on PyArray and CudaNdarray object.
This fct take as input the x,
""" """
default_args = Subtensor.default_helper_c_code_args() default_args = Subtensor.default_helper_c_code_args()
if update_flags is None:
update_flags = default_args['update_flags']
if set_data is None:
set_data = default_args['set_data']
if set_dim is None:
set_dim = default_args['set_dim']
if set_stride is None:
set_stride = default_args['set_stride']
if strides_mul is None: if strides_mul is None:
strides_mul = default_args['strides_mul'] strides_mul = default_args['strides_mul']
if c_prefix is None: if c_prefix is None:
c_prefix = default_args['c_prefix'] c_prefix = default_args['c_prefix']
if view_name is None:
view_name = default_args['view_name']
#update_flags may depend on view_name
update_flags = update_flags % locals()
# #
# two arrays are created in C code: # two arrays are created in C code:
# is_slice: len == ndim, 0 means int, 1 means slice # is_slice: len == ndim, 0 means int, 1 means slice
...@@ -657,12 +628,7 @@ class Subtensor(Op): ...@@ -657,12 +628,7 @@ class Subtensor(Op):
x, = inputs[:1] x, = inputs[:1]
z, = outputs z, = outputs
xview = view_name
rval = """ rval = """
#define PyArray_set_dim(obj, idx, d) PyArray_DIMS(obj)[idx]=d
#define PyArray_set_stride(obj, idx, d) PyArray_STRIDES(obj)[idx]=d
#define PyArray_set_data(obj, ptr, base) PyArray_BYTES(obj)=ptr
// The subtensor is created by iterating over the dimensions // The subtensor is created by iterating over the dimensions
// and updating stride, shape, and data pointers // and updating stride, shape, and data pointers
...@@ -674,32 +640,10 @@ class Subtensor(Op): ...@@ -674,32 +640,10 @@ class Subtensor(Op):
int inner_ii = 0; // the current dimension of zview int inner_ii = 0; // the current dimension of zview
int outer_ii = 0; // current dimension of z int outer_ii = 0; // current dimension of z
char* ptr = (char*) %(c_prefix)s_BYTES(%(xview)s); // Argument of the view
ssize_t xview_offset = 0;
if ((%(c_prefix)s_DIMS(%(xview)s) == %(c_prefix)s_DIMS(%(x)s)) ssize_t xview_dims[%(view_ndim)s];
&& (%(c_prefix)s_DIMS(%(x)s) != NULL)) ssize_t xview_strides[%(view_ndim)s];
{
PyErr_Format(PyExc_ValueError, "x and %(xview)s"
"(with %%d dims) have the same dimensions"
" pointers: %%p and %%p",
%(c_prefix)s_NDIM(%(x)s),
%(c_prefix)s_DIMS(%(xview)s),
%(c_prefix)s_DIMS(%(x)s));
Py_XDECREF(%(xview)s);
%(fail)s;
}
if (%(c_prefix)s_STRIDES(%(xview)s) == %(c_prefix)s_STRIDES(%(x)s)
&& (%(c_prefix)s_DIMS(%(x)s) != NULL))
{
PyErr_Format(PyExc_ValueError, "x and %(xview)s"
"(with %%d dims) have the same strides"
" pointers: %%p and %%p",
%(c_prefix)s_NDIM(%(x)s),
%(c_prefix)s_STRIDES(%(xview)s),
%(c_prefix)s_STRIDES(%(x)s));
Py_XDECREF(%(xview)s);
%(fail)s;
}
for (; outer_ii < %(len_is_slice)s; ++outer_ii) for (; outer_ii < %(len_is_slice)s; ++outer_ii)
{ {
...@@ -719,10 +663,8 @@ class Subtensor(Op): ...@@ -719,10 +663,8 @@ class Subtensor(Op):
// PySlice_GetIndicesEx in python source // PySlice_GetIndicesEx in python source
if (!step) if (!step)
{ {
Py_DECREF(%(xview)s);
PyErr_Format(PyExc_ValueError, PyErr_Format(PyExc_ValueError,
"slice step cannot be zero"); "slice step cannot be zero");
Py_XDECREF(%(xview)s);
%(fail)s; %(fail)s;
} }
...@@ -771,11 +713,10 @@ class Subtensor(Op): ...@@ -771,11 +713,10 @@ class Subtensor(Op):
assert (slicelength <= length); assert (slicelength <= length);
ptr += %(c_prefix)s_STRIDES(%(x)s)[outer_ii] * start * xview_offset += %(c_prefix)s_STRIDES(%(x)s)[outer_ii] * start *
%(strides_mul)s; %(strides_mul)s;
%(set_dim)s(%(xview)s, inner_ii, slicelength); xview_dims[inner_ii] = slicelength;
%(set_stride)s(%(xview)s, inner_ii, xview_strides[inner_ii] = %(c_prefix)s_STRIDES(%(x)s)[outer_ii] * step;
%(c_prefix)s_STRIDES(%(x)s)[outer_ii] * step);
inner_ii += 1; inner_ii += 1;
spec_pos += 3; spec_pos += 3;
...@@ -788,46 +729,41 @@ class Subtensor(Op): ...@@ -788,46 +729,41 @@ class Subtensor(Op):
{ {
if (idx < %(c_prefix)s_DIMS(%(x)s)[outer_ii]) if (idx < %(c_prefix)s_DIMS(%(x)s)[outer_ii])
{ {
ptr += %(c_prefix)s_STRIDES(%(x)s)[outer_ii] * idx * xview_offset += %(c_prefix)s_STRIDES(%(x)s)[outer_ii] * idx *
%(strides_mul)s; %(strides_mul)s;
} }
else else
{ {
PyErr_Format(PyExc_IndexError,"index out of bounds"); PyErr_Format(PyExc_IndexError,"index out of bounds");
Py_XDECREF(%(xview)s);
%(fail)s; %(fail)s;
} }
} }
else else
{ {
PyErr_Format(PyExc_IndexError,"index out of bounds"); PyErr_Format(PyExc_IndexError,"index out of bounds");
Py_XDECREF(%(xview)s);
%(fail)s; %(fail)s;
} }
spec_pos += 1; spec_pos += 1;
} }
} }
%(set_data)s(%(xview)s, ptr, (PyObject*)NULL); assert (inner_ii <= %(view_ndim)s);
assert (inner_ii <= %(c_prefix)s_NDIM(%(xview)s)); while (inner_ii < %(view_ndim)s)
while (inner_ii < %(c_prefix)s_NDIM(%(xview)s))
{ {
assert (outer_ii < %(c_prefix)s_NDIM(%(x)s)); assert (outer_ii < %(c_prefix)s_NDIM(%(x)s));
%(set_dim)s(%(xview)s, inner_ii, xview_dims[inner_ii] = %(c_prefix)s_DIMS(%(x)s)[outer_ii];
%(c_prefix)s_DIMS(%(x)s)[outer_ii]); xview_strides[inner_ii] = %(c_prefix)s_STRIDES(%(x)s)[outer_ii];
%(set_stride)s(%(xview)s, inner_ii,
%(c_prefix)s_STRIDES(%(x)s)[outer_ii]);
inner_ii += 1; inner_ii += 1;
outer_ii += 1; outer_ii += 1;
} }
%(update_flags)s
""" % locals() """ % locals()
# print rval # print rval
return rval return rval
@staticmethod @staticmethod
def helper_c_code_cache_version(): def helper_c_code_cache_version():
return (5,) return (6,)
def c_code(self, node, name, inputs, outputs, sub): # DEBUG def c_code(self, node, name, inputs, outputs, sub): # DEBUG
if not isinstance(node.inputs[0].type, theano.tensor.TensorType): if not isinstance(node.inputs[0].type, theano.tensor.TensorType):
...@@ -838,36 +774,45 @@ class Subtensor(Op): ...@@ -838,36 +774,45 @@ class Subtensor(Op):
view_ndim = node.outputs[0].ndim view_ndim = node.outputs[0].ndim
fail = sub['fail'] fail = sub['fail']
decl = "PyArrayObject * xview = NULL;"
get_xview = self.helper_c_code(node, name, inputs, outputs, sub,
self.idx_list, view_ndim)
build_view = """ build_view = """
//TODO: give this Op a second output so that this view can be cached //TODO: give this Op a second output so that this view can be cached
//TODO: alternatively, fix the memory leak on failure //TODO: alternatively, fix the memory leak on failure
Py_INCREF(PyArray_DESCR(%(x)s)); Py_INCREF(PyArray_DESCR(%(x)s));
PyArrayObject * xview = (PyArrayObject*)PyArray_NewFromDescr( xview = (PyArrayObject*)PyArray_NewFromDescr(
&PyArray_Type, &PyArray_Type,
PyArray_DESCR(%(x)s), PyArray_DESCR(%(x)s),
%(view_ndim)s, %(view_ndim)s,
PyArray_DIMS(%(x)s), xview_dims,
PyArray_STRIDES(%(x)s), xview_strides,
PyArray_DATA(%(x)s), PyArray_BYTES(%(x)s) + xview_offset,
%(x)s->flags, PyArray_FLAGS(%(x)s),
NULL); NULL);
assert (PyArray_NDIM(xview) == %(view_ndim)s);
if (!xview) if (!xview)
{ {
%(fail)s; %(fail)s;
} }
""" % locals() """ % locals()
get_xview = self.helper_c_code(node, name, inputs, outputs, sub,
self.idx_list)
finish_view = """ finish_view = """
if (%(z)s) Py_DECREF(%(z)s); //This is needed for NumPy 1.5, but not 1.7.2
PyArray_UpdateFlags(xview, NPY_ARRAY_C_CONTIGUOUS| NPY_ARRAY_F_CONTIGUOUS);
Py_XDECREF(%(z)s);
Py_INCREF(py_%(x)s); Py_INCREF(py_%(x)s);
#if NPY_API_VERSION < 0x00000007
PyArray_BASE(xview) = py_%(x)s; PyArray_BASE(xview) = py_%(x)s;
#else
PyArray_SetBaseObject(xview, py_%(x)s);
#endif
assert(py_%(x)s == (PyObject*)%(x)s); assert(py_%(x)s == (PyObject*)%(x)s);
%(z)s = xview; %(z)s = xview;
""" % locals() """ % locals()
return build_view + "{" + get_xview + "}" + finish_view return decl + get_xview + build_view + finish_view
def c_code_cache_version(self): def c_code_cache_version(self):
hv = self.helper_c_code_cache_version() hv = self.helper_c_code_cache_version()
...@@ -1150,6 +1095,9 @@ class IncSubtensor(Op): ...@@ -1150,6 +1095,9 @@ class IncSubtensor(Op):
(x, y) + inputs, (x, y) + inputs,
[x.type()]) [x.type()])
def decl_view(self):
return "PyArrayObject * zview = NULL;"
def perform(self, node, inputs, out_): def perform(self, node, inputs, out_):
out, = out_ out, = out_
x, y = inputs[:2] x, y = inputs[:2]
...@@ -1237,16 +1185,28 @@ class IncSubtensor(Op): ...@@ -1237,16 +1185,28 @@ class IncSubtensor(Op):
} }
else else
{ {
if (%(z)s) Py_DECREF(%(z)s); Py_XDECREF(%(z)s);
%(z)s = %(copy_of_x)s; %(z)s = %(copy_of_x)s;
} }
""" % locals() """ % locals()
# get info needed to make zview: a view of %(z)s
helper_args = self.get_helper_c_code_args()
get_zview = Subtensor.helper_c_code(
node=node,
name=name,
inputs=outputs[:1] + inputs[2:],
outputs=outputs,
sub=sub,
idx_list=self.idx_list,
view_ndim=view_ndim,
** helper_args
)
#Make a view on the output, as we will write into it.
alloc_zview = self.make_view_array(z, view_ndim) alloc_zview = self.make_view_array(z, view_ndim)
# On GPU, it takes two steps to make a view
link_zview = self.link_view_array(z, fail)
#Make a first view on the output, as we will write into it.
build_view = """ build_view = """
//TODO: give this Op a second output so that this view can be cached //TODO: give this Op a second output so that this view can be cached
//TODO: alternatively, fix the memory leak on failure //TODO: alternatively, fix the memory leak on failure
...@@ -1255,21 +1215,7 @@ class IncSubtensor(Op): ...@@ -1255,21 +1215,7 @@ class IncSubtensor(Op):
{ {
%(fail)s; %(fail)s;
} }
%(link_zview)s;
""" % locals() """ % locals()
# make zview actually a view of %(z)s
helper_args = self.get_helper_c_code_args()
helper_args['view_name'] = 'zview'
get_zview = self.define_set_data() + \
Subtensor.helper_c_code(
node=node,
name=name,
inputs=outputs[:1] + inputs[2:],
outputs=outputs,
sub=sub,
idx_list=self.idx_list,
** helper_args
)
copy_into = self.copy_into("zview", y) copy_into = self.copy_into("zview", y)
...@@ -1289,12 +1235,12 @@ class IncSubtensor(Op): ...@@ -1289,12 +1235,12 @@ class IncSubtensor(Op):
%(add_to_zview)s %(add_to_zview)s
} }
""" % locals() """ % locals()
return (self.decl_view() +
return (copy_input_if_necessary copy_input_if_necessary +
+ build_view get_zview +
+ "{" + get_zview + "}" build_view +
+ make_modification make_modification +
+ "Py_DECREF(zview);" "Py_DECREF(zview);"
) )
def do_type_checking(self, node): def do_type_checking(self, node):
...@@ -1344,16 +1290,18 @@ class IncSubtensor(Op): ...@@ -1344,16 +1290,18 @@ class IncSubtensor(Op):
""" """
return """Py_INCREF(PyArray_DESCR(%(x)s)); return """Py_INCREF(PyArray_DESCR(%(x)s));
PyArrayObject * zview = zview = (PyArrayObject*)PyArray_NewFromDescr(
(PyArrayObject*)PyArray_NewFromDescr(
&PyArray_Type, &PyArray_Type,
PyArray_DESCR(%(x)s), PyArray_DESCR(%(x)s),
%(view_ndim)s, %(view_ndim)s,
PyArray_DIMS(%(x)s), xview_dims, //PyArray_DIMS(%(x)s),
PyArray_STRIDES(%(x)s), xview_strides, //PyArray_STRIDES(%(x)s),
PyArray_DATA(%(x)s), PyArray_BYTES(%(x)s) + xview_offset, //PyArray_DATA(%(x)s),
%(x)s->flags, PyArray_FLAGS(%(x)s),
NULL)""" % locals() NULL);
//This is needed for NumPy 1.5, but not 1.7.2
PyArray_UpdateFlags(zview, NPY_ARRAY_C_CONTIGUOUS| NPY_ARRAY_F_CONTIGUOUS);
""" % locals()
def get_helper_c_code_args(self): def get_helper_c_code_args(self):
""" Return a dictionary of arguments to pass to helper_c_code.""" """ Return a dictionary of arguments to pass to helper_c_code."""
...@@ -1369,24 +1317,6 @@ class IncSubtensor(Op): ...@@ -1369,24 +1317,6 @@ class IncSubtensor(Op):
""" """
return """PyArray_CopyInto(%(view)s, %(source)s)""" % locals() return """PyArray_CopyInto(%(view)s, %(source)s)""" % locals()
def define_set_data(self):
""" Returns C code used to define any macros used in the
set data argument to the helper C code. """
return ""
def link_view_array(self, x, fail):
""" Returns code to complete making zview a view of x"""
# On CPU there is nothing to do, make_view_array already did this
return ""
def set_view_base(self, x, fail):
""" Returns code to make zview be a correct view of x,
after helper_c_code is done messing with x"""
# On CPU there is nothing to do
return ""
def add_to_zview(self, x, fail): def add_to_zview(self, x, fail):
""" Return C code to add x to zview. Should DECREF zview if the """ Return C code to add x to zview. Should DECREF zview if the
add fails.""" add fails."""
...@@ -1567,7 +1497,7 @@ class AdvancedSubtensor1(Op): ...@@ -1567,7 +1497,7 @@ class AdvancedSubtensor1(Op):
output_name = output_names[0] output_name = output_names[0]
fail = sub['fail'] fail = sub['fail']
return """ return """
PyObject *indices; PyArrayObject *indices;
int i_type = PyArray_TYPE(%(i_name)s); int i_type = PyArray_TYPE(%(i_name)s);
if (i_type != NPY_INTP) { if (i_type != NPY_INTP) {
// Cast %(i_name)s to NPY_INTP (expected by PyArray_TakeFrom), // Cast %(i_name)s to NPY_INTP (expected by PyArray_TakeFrom),
...@@ -1602,13 +1532,13 @@ class AdvancedSubtensor1(Op): ...@@ -1602,13 +1532,13 @@ class AdvancedSubtensor1(Op):
%(fail)s; %(fail)s;
} }
} }
indices = PyArray_Cast(%(i_name)s, NPY_INTP); indices = (PyArrayObject*) PyArray_Cast(%(i_name)s, NPY_INTP);
if (indices == NULL) { if (indices == NULL) {
%(fail)s; %(fail)s;
} }
} }
else { else {
indices = (PyObject *)%(i_name)s; indices = %(i_name)s;
Py_INCREF(indices); Py_INCREF(indices);
} }
if (%(output_name)s != NULL) { if (%(output_name)s != NULL) {
...@@ -1637,7 +1567,7 @@ class AdvancedSubtensor1(Op): ...@@ -1637,7 +1567,7 @@ class AdvancedSubtensor1(Op):
} }
} }
%(output_name)s = (PyArrayObject*)PyArray_TakeFrom( %(output_name)s = (PyArrayObject*)PyArray_TakeFrom(
%(a_name)s, indices, 0, %(output_name)s, NPY_RAISE); %(a_name)s, (PyObject*)indices, 0, %(output_name)s, NPY_RAISE);
Py_DECREF(indices); Py_DECREF(indices);
if (%(output_name)s == NULL) %(fail)s; if (%(output_name)s == NULL) %(fail)s;
""" % locals() """ % locals()
......
#from nose.plugins.skip import SkipTest from copy import copy
#import traceback from unittest import TestCase
import itertools
import sys
import theano.tensor as T
from theano import tensor
from theano.compat import PY3, exc_message
from theano.gof.python25 import product as itertools_product
from theano.gof.python25 import any
from theano.printing import pp
import numpy import numpy
import theano
from numpy import (arange, array, common_type, complex64, complex128, float32, from numpy import (arange, array, common_type, complex64, complex128, float32,
float64, newaxis, shape, transpose, zeros) float64, newaxis, shape, transpose, zeros)
from numpy.testing import assert_array_almost_equal from numpy.testing import assert_array_almost_equal
#from numpy.testing import dec
#from numpy.testing.noseclasses import KnownFailureTest import theano
import theano.tensor as T
from theano import tensor, Param, shared, config
from theano.compat import exc_message
from theano.gof.python25 import product as itertools_product
from theano.gof.python25 import any
from theano.printing import pp
from theano.tensor.blas import (_dot22, _dot22scalar, res_is_a, _as_scalar, from theano.tensor.blas import (_dot22, _dot22scalar, res_is_a, _as_scalar,
_is_real_matrix, _gemm_canonicalize, _is_real_matrix, _gemm_canonicalize,
_factor_canonicalized, Gemm, Gemv, _factor_canonicalized, Gemm, Gemv,
gemm_inplace, gemm_no_inplace, gemm_inplace, gemm_no_inplace,
InconsistencyError, Ger, ger, ger_destructive) InconsistencyError, Ger, ger, ger_destructive)
from unittest import TestCase
from theano.tests import unittest_tools from theano.tests import unittest_tools
from copy import copy, deepcopy
from theano import Param, shared, config
from test_basic import (_approx_eq, as_tensor_variable, inplace_func, from test_basic import (_approx_eq, as_tensor_variable, inplace_func,
compile, inplace) compile, inplace)
#, constant, eval_outputs) #, constant, eval_outputs)
...@@ -361,11 +353,8 @@ class t_gemm(TestCase): ...@@ -361,11 +353,8 @@ class t_gemm(TestCase):
z = tz.get_value(borrow=True, return_internal_type=True) z = tz.get_value(borrow=True, return_internal_type=True)
z[:, :, i] = z_i z[:, :, i] = z_i
self.assertTrue( unittest_tools.assert_allclose(z_after[:, :, i],
_approx_eq(z_after[:, :, i], tz.get_value(borrow=True)[:, :, i])
tz.get_value(borrow=True)[:, :, i]),
(z_orig[:, :, i], z_after[:, :, i],
z[:, :, i], z_after[:, :, i] - z[:, :, i]))
tz_i = gemm_no_inplace(tz[:, :, i], ta, tx[ tz_i = gemm_no_inplace(tz[:, :, i], ta, tx[
:, :, i], ty[:, :, i], tb) :, :, i], ty[:, :, i], tb)
...@@ -374,11 +363,8 @@ class t_gemm(TestCase): ...@@ -374,11 +363,8 @@ class t_gemm(TestCase):
mode=compile.Mode(optimizer=None, linker=l)) mode=compile.Mode(optimizer=None, linker=l))
for j in xrange(3): for j in xrange(3):
g_i() g_i()
self.assertTrue( unittest_tools.assert_allclose(z_after[:, :, i],
_approx_eq(z_after[:, :, i], tz.get_value(borrow=True)[:, :, i])
tz.get_value(borrow=True)[:, :, i]),
(z_orig[:, :, i], z_after[:, :, i],
z[:, :, i], z_after[:, :, i] - z[:, :, i]))
t(C, A, B) t(C, A, B)
t(C.transpose((1, 0, 2)), A, B) t(C.transpose((1, 0, 2)), A, B)
......
...@@ -54,7 +54,7 @@ class Test_inc_subtensor(unittest.TestCase): ...@@ -54,7 +54,7 @@ class Test_inc_subtensor(unittest.TestCase):
else: else:
expected_result[:, :val_sl2_end] += val_inc expected_result[:, :val_sl2_end] += val_inc
self.assertTrue(numpy.array_equal(result, expected_result)) utt.assert_allclose(result, expected_result)
def test_wrong_dims(self): def test_wrong_dims(self):
a = tt.matrix() a = tt.matrix()
...@@ -122,7 +122,7 @@ class Test_inc_subtensor(unittest.TestCase): ...@@ -122,7 +122,7 @@ class Test_inc_subtensor(unittest.TestCase):
else: else:
expected_result[:, sl3, :val_sl2_end] += val_inc expected_result[:, sl3, :val_sl2_end] += val_inc
self.assertTrue(numpy.array_equal(result, expected_result)) utt.assert_allclose(result, expected_result)
def test_grad_inc_set(self): def test_grad_inc_set(self):
def inc_slice(*s): def inc_slice(*s):
......
...@@ -446,8 +446,9 @@ class TensorType(Type): ...@@ -446,8 +446,9 @@ class TensorType(Type):
%(fail)s %(fail)s
} }
// We expect %(type_num)s // We expect %(type_num)s
type_num_%(name)s = ((PyArrayObject*)py_%(name)s)->descr->type_num; type_num_%(name)s = PyArray_TYPE((PyArrayObject*) py_%(name)s);
if (!PyArray_ISALIGNED(py_%(name)s)) { if (!PyArray_ISALIGNED((PyArrayObject*) py_%(name)s)) {
PyArrayObject * tmp = (PyArrayObject*) py_%(name)s;
PyErr_Format(PyExc_NotImplementedError, PyErr_Format(PyExc_NotImplementedError,
"expected an aligned array of type %%ld " "expected an aligned array of type %%ld "
"(%(type_num)s), got non-aligned array of type %%ld" "(%(type_num)s), got non-aligned array of type %%ld"
...@@ -456,19 +457,19 @@ class TensorType(Type): ...@@ -456,19 +457,19 @@ class TensorType(Type):
" and 3 last strides %%ld %%ld, %%ld.", " and 3 last strides %%ld %%ld, %%ld.",
(long int) %(type_num)s, (long int) %(type_num)s,
(long int) type_num_%(name)s, (long int) type_num_%(name)s,
(long int) PyArray_NDIM(py_%(name)s), (long int) PyArray_NDIM(tmp),
(long int) PyArray_NDIM(py_%(name)s) >= 3 ? (long int) PyArray_NDIM(tmp) >= 3 ?
PyArray_DIMS(py_%(name)s)[PyArray_NDIM(py_%(name)s)-3] : -1, PyArray_DIMS(tmp)[PyArray_NDIM(tmp)-3] : -1,
(long int) PyArray_NDIM(py_%(name)s) >= 2 ? (long int) PyArray_NDIM(tmp) >= 2 ?
PyArray_DIMS(py_%(name)s)[PyArray_NDIM(py_%(name)s)-2] : -1, PyArray_DIMS(tmp)[PyArray_NDIM(tmp)-2] : -1,
(long int) PyArray_NDIM(py_%(name)s) >= 1 ? (long int) PyArray_NDIM(tmp) >= 1 ?
PyArray_DIMS(py_%(name)s)[PyArray_NDIM(py_%(name)s)-1] : -1, PyArray_DIMS(tmp)[PyArray_NDIM(tmp)-1] : -1,
(long int) PyArray_NDIM(py_%(name)s) >= 3 ? (long int) PyArray_NDIM(tmp) >= 3 ?
PyArray_STRIDES(py_%(name)s)[PyArray_NDIM(py_%(name)s)-3] : -1, PyArray_STRIDES(tmp)[PyArray_NDIM(tmp)-3] : -1,
(long int) PyArray_NDIM(py_%(name)s) >= 2 ? (long int) PyArray_NDIM(tmp) >= 2 ?
PyArray_STRIDES(py_%(name)s)[PyArray_NDIM(py_%(name)s)-2] : -1, PyArray_STRIDES(tmp)[PyArray_NDIM(tmp)-2] : -1,
(long int) PyArray_NDIM(py_%(name)s) >= 1 ? (long int) PyArray_NDIM(tmp) >= 1 ?
PyArray_STRIDES(py_%(name)s)[PyArray_NDIM(py_%(name)s)-1] : -1 PyArray_STRIDES(tmp)[PyArray_NDIM(tmp)-1] : -1
); );
%(fail)s %(fail)s
} }
...@@ -508,7 +509,7 @@ class TensorType(Type): ...@@ -508,7 +509,7 @@ class TensorType(Type):
{Py_XINCREF(py_%(name)s);} {Py_XINCREF(py_%(name)s);}
if (!PyArray_ISALIGNED(py_%(name)s)) { if (%(name)s && !PyArray_ISALIGNED((PyArrayObject*) py_%(name)s)) {
PyErr_Format(PyExc_NotImplementedError, PyErr_Format(PyExc_NotImplementedError,
"c_sync: expected an aligned array of type %%ld " "c_sync: expected an aligned array of type %%ld "
"(%(type_num)s), got non-aligned array of type %%ld" "(%(type_num)s), got non-aligned array of type %%ld"
...@@ -517,19 +518,19 @@ class TensorType(Type): ...@@ -517,19 +518,19 @@ class TensorType(Type):
" and 3 last strides %%ld %%ld, %%ld.", " and 3 last strides %%ld %%ld, %%ld.",
(long int) %(type_num)s, (long int) %(type_num)s,
(long int) type_num_%(name)s, (long int) type_num_%(name)s,
(long int) PyArray_NDIM(py_%(name)s), (long int) PyArray_NDIM(%(name)s),
(long int) PyArray_NDIM(py_%(name)s) >= 3 ? (long int) PyArray_NDIM(%(name)s) >= 3 ?
PyArray_DIMS(py_%(name)s)[PyArray_NDIM(py_%(name)s)-3] : -1, PyArray_DIMS(%(name)s)[PyArray_NDIM(%(name)s)-3] : -1,
(long int) PyArray_NDIM(py_%(name)s) >= 2 ? (long int) PyArray_NDIM(%(name)s) >= 2 ?
PyArray_DIMS(py_%(name)s)[PyArray_NDIM(py_%(name)s)-2] : -1, PyArray_DIMS(%(name)s)[PyArray_NDIM(%(name)s)-2] : -1,
(long int) PyArray_NDIM(py_%(name)s) >= 1 ? (long int) PyArray_NDIM(%(name)s) >= 1 ?
PyArray_DIMS(py_%(name)s)[PyArray_NDIM(py_%(name)s)-1] : -1, PyArray_DIMS(%(name)s)[PyArray_NDIM(%(name)s)-1] : -1,
(long int) PyArray_NDIM(py_%(name)s) >= 3 ? (long int) PyArray_NDIM(%(name)s) >= 3 ?
PyArray_STRIDES(py_%(name)s)[PyArray_NDIM(py_%(name)s)-3] : -1, PyArray_STRIDES(%(name)s)[PyArray_NDIM(%(name)s)-3] : -1,
(long int) PyArray_NDIM(py_%(name)s) >= 2 ? (long int) PyArray_NDIM(%(name)s) >= 2 ?
PyArray_STRIDES(py_%(name)s)[PyArray_NDIM(py_%(name)s)-2] : -1, PyArray_STRIDES(%(name)s)[PyArray_NDIM(%(name)s)-2] : -1,
(long int) PyArray_NDIM(py_%(name)s) >= 1 ? (long int) PyArray_NDIM(%(name)s) >= 1 ?
PyArray_STRIDES(py_%(name)s)[PyArray_NDIM(py_%(name)s)-1] : -1 PyArray_STRIDES(%(name)s)[PyArray_NDIM(%(name)s)-1] : -1
); );
%(fail)s %(fail)s
} }
...@@ -555,7 +556,7 @@ class TensorType(Type): ...@@ -555,7 +556,7 @@ class TensorType(Type):
def c_code_cache_version(self): def c_code_cache_version(self):
scalar_version = scal.Scalar(self.dtype).c_code_cache_version() scalar_version = scal.Scalar(self.dtype).c_code_cache_version()
if scalar_version: if scalar_version:
return (10,) + scalar_version return (11,) + scalar_version
else: else:
return () return ()
......
...@@ -919,9 +919,9 @@ class Fibby(theano.Op): ...@@ -919,9 +919,9 @@ class Fibby(theano.Op):
if (!%(y)s) if (!%(y)s)
%(fail)s; %(fail)s;
{//New scope needed to make compilation work {//New scope needed to make compilation work
dtype_%(y)s * y = (dtype_%(y)s*)%(y)s->data; dtype_%(y)s * y = (dtype_%(y)s*)PyArray_DATA(%(y)s);
dtype_%(x)s * x = (dtype_%(x)s*)%(x)s->data; dtype_%(x)s * x = (dtype_%(x)s*)PyArray_DATA(%(x)s);
for (int i = 2; i < %(x)s->dimensions[0]; ++i) for (int i = 2; i < PyArray_DIMS(%(x)s)[0]; ++i)
y[i] = y[i-1]*y[i-2] + x[i]; y[i] = y[i-1]*y[i-2] + x[i];
} }
""" % locals() """ % locals()
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论