提交 d606816a authored 作者: Frederic Bastien's avatar Frederic Bastien

Some small modification to make allow making op that use pycuda generated fct.

上级 51c78fb1
......@@ -109,7 +109,7 @@ if cuda_available:
import basic_ops
from basic_ops import (GpuFromHost, HostFromGpu, GpuElemwise,
GpuDimShuffle, GpuSum, GpuReshape,
GpuDimShuffle, GpuSum, GpuReshape, GpuContiguous,
GpuSubtensor, GpuIncSubtensor, GpuFlatten, GpuShape, GpuAlloc,
GpuJoin,fscalar, fscalar, fvector, fmatrix, frow, fcol, ftensor3, ftensor4
, scalar, vector, matrix, row, col, tensor3, tensor4)
......
......@@ -1894,6 +1894,43 @@ class GpuAlloc(Op):
gpu_alloc = GpuAlloc()
class GpuContiguous(Op):
def make_node(self, input):
input = as_cuda_ndarray_variable(input)
return Apply(self, [input], [input.type()])
def __str__(self):
return self.__class__.__name__
def c_code(self, node, name, (input,), (z,), sub):
fail = sub['fail']
str = """
{
if (CudaNdarray_is_c_contiguous(%(input)s)){
Py_XDECREF(%(z)s);
%(z)s = %(input)s;
Py_INCREF(%(z)s);
} else if ((NULL == %(z)s)"""%locals()
for i in range(len(node.inputs[0].type.broadcastable)):
str += "\n|| (CudaNdarray_HOST_DIMS(%(input)s)[%(i)s] != CudaNdarray_HOST_DIMS(%(z)s)[%(i)s])"%locals()
str += """)
{
Py_XDECREF(%(z)s);
%(z)s = (CudaNdarray*)CudaNdarray_Copy(%(input)s);
if (!%(z)s)
{
%(fail)s;
}
}else if(CudaNdarray_CopyFromCudaNdarray(%(z)s,%(input)s)){
%(fail)s;
}
}
"""%locals()
return str
gpu_contiguous = GpuContiguous()
# Those are predifined CudaNdarrayType as done in tensor.basic
# Usefull mostly for test as the gpu op are inserted automatically...
......
......@@ -1584,6 +1584,12 @@ static PyGetSetDef CudaNdarray_getset[] = {
(setter)CudaNdarray_set_strides,
"data pointer strides (in elements)",
NULL},
//gpudata is needed to allow calling pycuda fct with CudaNdarray input.
{"gpudata",
(getter)CudaNdarray_get_dev_data,
NULL,//setter)CudaNdarray_set_dev_data,
"device data pointer",
NULL},
{"_dev_data",
(getter)CudaNdarray_get_dev_data,
(setter)CudaNdarray_set_dev_data,
......@@ -1599,6 +1605,12 @@ static PyGetSetDef CudaNdarray_getset[] = {
NULL,
"Return the number of element in this objects.",
NULL},
//mem_size is neede for pycuda.elementwise.ElementwiseKernel Why do they use size and mem_size of the same value?
{"mem_size",
(getter)CudaNdarray_SIZE_Object,
NULL,
"Return the number of element in this objects.",
NULL},
{NULL, NULL, NULL, NULL} /* Sentinel */
};
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论