提交 d7ca497f authored 作者: James Bergstra's avatar James Bergstra

modified elemwise to reuse output storage when possible. this sped it up

上级 7c8d604f
...@@ -202,8 +202,8 @@ class RecAlgo(object): ...@@ -202,8 +202,8 @@ class RecAlgo(object):
class NaiveAlgo(object): class NaiveAlgo(object):
verbose = False verbose = False
#cache_version = () cache_version = ()
cache_version = ('debug', 2) cache_version = ('debug', 3)
def __init__(self, scalar_op): def __init__(self, scalar_op):
self.scalar_op = scalar_op self.scalar_op = scalar_op
...@@ -893,11 +893,13 @@ class NaiveAlgo(object): ...@@ -893,11 +893,13 @@ class NaiveAlgo(object):
for oname in outputs: for oname in outputs:
print >> sio, """ print >> sio, """
if (cnda_%(oname)s) { for (int i = 0; (i< %(nd)s) && (cnda_%(oname)s); ++i) {
//TODO: check if we can maybe use existing storage if (dims[i] != CudaNdarray_HOST_DIMS(cnda_%(oname)s)[i])
{
Py_DECREF(cnda_%(oname)s); Py_DECREF(cnda_%(oname)s);
cnda_%(oname)s = NULL; cnda_%(oname)s = NULL;
} }
}
if (NULL == cnda_%(oname)s) if (NULL == cnda_%(oname)s)
{ {
cnda_%(oname)s = (CudaNdarray*)CudaNdarray_new_null(); cnda_%(oname)s = (CudaNdarray*)CudaNdarray_new_null();
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论