提交 d7ca497f authored 作者: James Bergstra's avatar James Bergstra

modified elemwise to reuse output storage when possible. this sped it up

上级 7c8d604f
......@@ -202,8 +202,8 @@ class RecAlgo(object):
class NaiveAlgo(object):
verbose = False
#cache_version = ()
cache_version = ('debug', 2)
cache_version = ()
cache_version = ('debug', 3)
def __init__(self, scalar_op):
self.scalar_op = scalar_op
......@@ -893,11 +893,13 @@ class NaiveAlgo(object):
for oname in outputs:
print >> sio, """
if (cnda_%(oname)s) {
//TODO: check if we can maybe use existing storage
for (int i = 0; (i< %(nd)s) && (cnda_%(oname)s); ++i) {
if (dims[i] != CudaNdarray_HOST_DIMS(cnda_%(oname)s)[i])
{
Py_DECREF(cnda_%(oname)s);
cnda_%(oname)s = NULL;
}
}
if (NULL == cnda_%(oname)s)
{
cnda_%(oname)s = (CudaNdarray*)CudaNdarray_new_null();
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论