提交 ffd29adf authored 作者: Frederic's avatar Frederic

fix GpuElemwise version for ndim==0

上级 d99cb9df
...@@ -599,12 +599,12 @@ class NaiveAlgo(object): ...@@ -599,12 +599,12 @@ class NaiveAlgo(object):
for d in xrange(nd): for d in xrange(nd):
print >> sio, 'std::cerr << " " << local_dims[%(d)s]; ' % locals() print >> sio, 'std::cerr << " " << local_dims[%(d)s]; ' % locals()
print >> sio, 'std::cerr << "\\n";' print >> sio, 'std::cerr << "\\n";'
if nd > 0:
for ipos in xrange(len(node.inputs)): for ipos in xrange(len(node.inputs)):
print >> sio, 'std::cerr << " local_str inputs %(ipos)s: " <<'%locals() + \ print >> sio, 'std::cerr << " local_str inputs %(ipos)s: " <<'%locals() + \
' << " " << '.join(["local_str[%s][%s]" % (ipos, x) for x in xrange(nd)])+'<<"\\n";' ' << " " << '.join(["local_str[%s][%s]" % (ipos, x) for x in xrange(nd)])+'<<"\\n";'
for ipos in xrange(len(node.outputs)): for ipos in xrange(len(node.outputs)):
print >> sio, 'std::cerr << " local_ostr inputs %(ipos)s: " <<'%locals() + \ print >> sio, 'std::cerr << " local_ostr inputs %(ipos)s: " <<'%locals() + \
' << " " << '.join(["local_ostr[%s][%s]" % (ipos, x) for x in xrange(nd)])+'<<"\\n";' ' << " " << '.join(["local_ostr[%s][%s]" % (ipos, x) for x in xrange(nd)])+'<<"\\n";'
print >> sio, """ print >> sio, """
...@@ -642,11 +642,11 @@ class NaiveAlgo(object): ...@@ -642,11 +642,11 @@ class NaiveAlgo(object):
for d in xrange(nd): for d in xrange(nd):
print >> sio, 'std::cerr << " " << local_dims[%(d)s]; '%locals() print >> sio, 'std::cerr << " " << local_dims[%(d)s]; '%locals()
print >> sio, 'std::cerr << "\\n";' print >> sio, 'std::cerr << "\\n";'
if nd > 0:
for ipos in xrange(len(node.inputs)): for ipos in xrange(len(node.inputs)):
print >> sio, 'std::cerr << " local_str %(ipos)s: " <<'%locals()+' << " " << '.join(["local_str[%s][%s]" % (ipos, x) for x in xrange(nd)])+'<<"\\n";' print >> sio, 'std::cerr << " local_str %(ipos)s: " <<'%locals()+' << " " << '.join(["local_str[%s][%s]" % (ipos, x) for x in xrange(nd)])+'<<"\\n";'
for ipos in xrange(len(node.outputs)): for ipos in xrange(len(node.outputs)):
print >> sio, 'std::cerr << " local_ostr %(ipos)s: " <<'%locals()+' << " " << '.join(["local_ostr[%s][%s]" % (ipos, x) for x in xrange(nd)])+'<<"\\n";' print >> sio, 'std::cerr << " local_ostr %(ipos)s: " <<'%locals()+' << " " << '.join(["local_ostr[%s][%s]" % (ipos, x) for x in xrange(nd)])+'<<"\\n";'
# collapse contiguous dimensions (ignoring scalars, generic version(collapse any dimensions, right, left, middle)) # collapse contiguous dimensions (ignoring scalars, generic version(collapse any dimensions, right, left, middle))
# this is a good idea because we make less index calculation in the gpu. # this is a good idea because we make less index calculation in the gpu.
...@@ -729,11 +729,11 @@ nd_collapse_[i]=0; ...@@ -729,11 +729,11 @@ nd_collapse_[i]=0;
for d in xrange(nd): for d in xrange(nd):
print >> sio, 'std::cerr << " " << local_dims[%(d)s]; '%locals() print >> sio, 'std::cerr << " " << local_dims[%(d)s]; '%locals()
print >> sio, 'std::cerr << "\\n";' print >> sio, 'std::cerr << "\\n";'
if nd > 0:
for ipos in xrange(len(node.inputs)): for ipos in xrange(len(node.inputs)):
print >> sio, 'std::cerr << " local_str %(ipos)s: " <<'%locals()+' << " " << '.join(["local_str[%s][%s]"%(ipos, x) for x in xrange(nd)])+'<<"\\n";' print >> sio, 'std::cerr << " local_str %(ipos)s: " <<'%locals()+' << " " << '.join(["local_str[%s][%s]"%(ipos, x) for x in xrange(nd)])+'<<"\\n";'
for ipos in xrange(len(node.outputs)): for ipos in xrange(len(node.outputs)):
print >> sio, 'std::cerr << " local_ostr %(ipos)s: " <<'%locals()+' << " " << '.join(["local_ostr[%s][%s]"%(ipos, x) for x in xrange(nd)])+'<<"\\n";' print >> sio, 'std::cerr << " local_ostr %(ipos)s: " <<'%locals()+' << " " << '.join(["local_ostr[%s][%s]"%(ipos, x) for x in xrange(nd)])+'<<"\\n";'
def launch_Ccontiguous(nodename, scalar_op, sync=True): def launch_Ccontiguous(nodename, scalar_op, sync=True):
kernel_call_args = ["numEls"] kernel_call_args = ["numEls"]
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论