提交 d606816a authored 作者: Frederic Bastien's avatar Frederic Bastien

Some small modification to make allow making op that use pycuda generated fct.

上级 51c78fb1
...@@ -109,7 +109,7 @@ if cuda_available: ...@@ -109,7 +109,7 @@ if cuda_available:
import basic_ops import basic_ops
from basic_ops import (GpuFromHost, HostFromGpu, GpuElemwise, from basic_ops import (GpuFromHost, HostFromGpu, GpuElemwise,
GpuDimShuffle, GpuSum, GpuReshape, GpuDimShuffle, GpuSum, GpuReshape, GpuContiguous,
GpuSubtensor, GpuIncSubtensor, GpuFlatten, GpuShape, GpuAlloc, GpuSubtensor, GpuIncSubtensor, GpuFlatten, GpuShape, GpuAlloc,
GpuJoin,fscalar, fscalar, fvector, fmatrix, frow, fcol, ftensor3, ftensor4 GpuJoin,fscalar, fscalar, fvector, fmatrix, frow, fcol, ftensor3, ftensor4
, scalar, vector, matrix, row, col, tensor3, tensor4) , scalar, vector, matrix, row, col, tensor3, tensor4)
......
...@@ -1894,6 +1894,43 @@ class GpuAlloc(Op): ...@@ -1894,6 +1894,43 @@ class GpuAlloc(Op):
gpu_alloc = GpuAlloc() gpu_alloc = GpuAlloc()
class GpuContiguous(Op):
def make_node(self, input):
input = as_cuda_ndarray_variable(input)
return Apply(self, [input], [input.type()])
def __str__(self):
return self.__class__.__name__
def c_code(self, node, name, (input,), (z,), sub):
fail = sub['fail']
str = """
{
if (CudaNdarray_is_c_contiguous(%(input)s)){
Py_XDECREF(%(z)s);
%(z)s = %(input)s;
Py_INCREF(%(z)s);
} else if ((NULL == %(z)s)"""%locals()
for i in range(len(node.inputs[0].type.broadcastable)):
str += "\n|| (CudaNdarray_HOST_DIMS(%(input)s)[%(i)s] != CudaNdarray_HOST_DIMS(%(z)s)[%(i)s])"%locals()
str += """)
{
Py_XDECREF(%(z)s);
%(z)s = (CudaNdarray*)CudaNdarray_Copy(%(input)s);
if (!%(z)s)
{
%(fail)s;
}
}else if(CudaNdarray_CopyFromCudaNdarray(%(z)s,%(input)s)){
%(fail)s;
}
}
"""%locals()
return str
gpu_contiguous = GpuContiguous()
# Those are predifined CudaNdarrayType as done in tensor.basic # Those are predifined CudaNdarrayType as done in tensor.basic
# Usefull mostly for test as the gpu op are inserted automatically... # Usefull mostly for test as the gpu op are inserted automatically...
......
...@@ -1584,6 +1584,12 @@ static PyGetSetDef CudaNdarray_getset[] = { ...@@ -1584,6 +1584,12 @@ static PyGetSetDef CudaNdarray_getset[] = {
(setter)CudaNdarray_set_strides, (setter)CudaNdarray_set_strides,
"data pointer strides (in elements)", "data pointer strides (in elements)",
NULL}, NULL},
//gpudata is needed to allow calling pycuda fct with CudaNdarray input.
{"gpudata",
(getter)CudaNdarray_get_dev_data,
NULL,//setter)CudaNdarray_set_dev_data,
"device data pointer",
NULL},
{"_dev_data", {"_dev_data",
(getter)CudaNdarray_get_dev_data, (getter)CudaNdarray_get_dev_data,
(setter)CudaNdarray_set_dev_data, (setter)CudaNdarray_set_dev_data,
...@@ -1599,6 +1605,12 @@ static PyGetSetDef CudaNdarray_getset[] = { ...@@ -1599,6 +1605,12 @@ static PyGetSetDef CudaNdarray_getset[] = {
NULL, NULL,
"Return the number of element in this objects.", "Return the number of element in this objects.",
NULL}, NULL},
//mem_size is neede for pycuda.elementwise.ElementwiseKernel Why do they use size and mem_size of the same value?
{"mem_size",
(getter)CudaNdarray_SIZE_Object,
NULL,
"Return the number of element in this objects.",
NULL},
{NULL, NULL, NULL, NULL} /* Sentinel */ {NULL, NULL, NULL, NULL} /* Sentinel */
}; };
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论