提交 38694679 authored 作者: notoraptor's avatar notoraptor

Remove Python loop from GpuContiguous's C code.

上级 0e99b3ad
...@@ -1073,27 +1073,26 @@ class GpuContiguous(Op): ...@@ -1073,27 +1073,26 @@ class GpuContiguous(Op):
context_name=infer_context_name(input)) context_name=infer_context_name(input))
return Apply(self, [input], [input.type()]) return Apply(self, [input], [input.type()])
def c_header_dirs(self):
return [os.path.dirname(__file__)]
def c_headers(self): def c_headers(self):
return ['<numpy_compat.h>'] return ['<gpuarray_helper.h>']
def c_code_cache_version(self): def c_code_cache_version(self):
return (3,) return (4,)
def c_code(self, node, name, inp, out, sub): def c_code(self, node, name, inp, out, sub):
input, = inp return """
z, = out
fail = sub['fail']
str = """
{ {
if (GpuArray_IS_C_CONTIGUOUS(&(%(input)s->ga))){ if (GpuArray_IS_C_CONTIGUOUS(&(%(input)s->ga))) {
Py_XDECREF(%(z)s); Py_XDECREF(%(z)s);
%(z)s = %(input)s; %(z)s = %(input)s;
Py_INCREF(%(z)s); Py_INCREF(%(z)s);
} else if ((NULL == %(z)s)""" % locals() } else if (NULL == %(z)s
for i in xrange(len(node.inputs[0].type.broadcastable)): || !theano_size_check(%(z)s, PyGpuArray_NDIM(%(input)s), PyGpuArray_DIMS(%(input)s),
str += "\n|| (PyGpuArray_DIMS(%(input)s)[%(i)s] != PyGpuArray_DIMS(%(z)s)[%(i)s])" % locals() %(input)s->ga.typecode)
str += """
|| !GpuArray_IS_C_CONTIGUOUS(&(%(z)s->ga))) || !GpuArray_IS_C_CONTIGUOUS(&(%(z)s->ga)))
{ {
Py_XDECREF(%(z)s); Py_XDECREF(%(z)s);
...@@ -1102,12 +1101,11 @@ class GpuContiguous(Op): ...@@ -1102,12 +1101,11 @@ class GpuContiguous(Op):
{ {
%(fail)s; %(fail)s;
} }
}else if(pygpu_move(%(z)s, %(input)s) == -1) { } else if(pygpu_move(%(z)s, %(input)s) == -1) {
%(fail)s; %(fail)s;
} }
} }
""" % locals() """ % dict(input=inp[0], z=out[0], fail=sub['fail'])
return str
gpu_contiguous = GpuContiguous() gpu_contiguous = GpuContiguous()
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论