提交 d7ca497f authored 作者: James Bergstra's avatar James Bergstra

modified elemwise to reuse output storage when possible. this sped it up

上级 7c8d604f
...@@ -202,8 +202,8 @@ class RecAlgo(object): ...@@ -202,8 +202,8 @@ class RecAlgo(object):
class NaiveAlgo(object): class NaiveAlgo(object):
verbose = False verbose = False
#cache_version = () cache_version = ()
cache_version = ('debug', 2) cache_version = ('debug', 3)
def __init__(self, scalar_op): def __init__(self, scalar_op):
self.scalar_op = scalar_op self.scalar_op = scalar_op
...@@ -893,10 +893,12 @@ class NaiveAlgo(object): ...@@ -893,10 +893,12 @@ class NaiveAlgo(object):
for oname in outputs: for oname in outputs:
print >> sio, """ print >> sio, """
if (cnda_%(oname)s) { for (int i = 0; (i< %(nd)s) && (cnda_%(oname)s); ++i) {
//TODO: check if we can maybe use existing storage if (dims[i] != CudaNdarray_HOST_DIMS(cnda_%(oname)s)[i])
Py_DECREF(cnda_%(oname)s); {
cnda_%(oname)s = NULL; Py_DECREF(cnda_%(oname)s);
cnda_%(oname)s = NULL;
}
} }
if (NULL == cnda_%(oname)s) if (NULL == cnda_%(oname)s)
{ {
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论