提交 41dad32d authored 作者: James Bergstra's avatar James Bergstra

added verbose flag to NaiveAlgo to control printing to stdout, stderr

上级 73e46362
...@@ -201,6 +201,7 @@ class RecAlgo(object): ...@@ -201,6 +201,7 @@ class RecAlgo(object):
return self.c_src_kernel(node, nodename) + self.c_src_callkernel(node, nodename) return self.c_src_kernel(node, nodename) + self.c_src_callkernel(node, nodename)
class NaiveAlgo(object): class NaiveAlgo(object):
verbose = False
def __init__(self, **kwargs): def __init__(self, **kwargs):
self.__dict__.update(kwargs) self.__dict__.update(kwargs)
...@@ -287,7 +288,7 @@ class NaiveAlgo(object): ...@@ -287,7 +288,7 @@ class NaiveAlgo(object):
#print >> sio, indent, "const float * i%i" % ipos, '= i%i_data', '' #print >> sio, indent, "const float * i%i" % ipos, '= i%i_data', ''
print >> sio, "}" print >> sio, "}"
print sio.getvalue() #print sio.getvalue()
return sio.getvalue() return sio.getvalue()
def c_src_kernel_tiling(self, node, nodename): def c_src_kernel_tiling(self, node, nodename):
...@@ -425,7 +426,7 @@ class NaiveAlgo(object): ...@@ -425,7 +426,7 @@ class NaiveAlgo(object):
n_out = len(node.outputs) n_out = len(node.outputs)
sio = StringIO.StringIO() sio = StringIO.StringIO()
if nd not in (4,): if nd not in (2,):
return sio.getvalue() return sio.getvalue()
# print some leading comments to make the code easier to read # print some leading comments to make the code easier to read
...@@ -620,7 +621,7 @@ class NaiveAlgo(object): ...@@ -620,7 +621,7 @@ class NaiveAlgo(object):
print >> sio, " }" print >> sio, " }"
print >> sio, "}" print >> sio, "}"
print sio.getvalue() #print sio.getvalue()
return sio.getvalue() return sio.getvalue()
def c_src_callkernel(self, node, nodename): def c_src_callkernel(self, node, nodename):
...@@ -672,12 +673,13 @@ class NaiveAlgo(object): ...@@ -672,12 +673,13 @@ class NaiveAlgo(object):
%(output_params)s) %(output_params)s)
{ {
numEls = %(prod_dims)s; numEls = %(prod_dims)s;
""" %locals()
if self.verbose:
print >> sio, """
std::cerr << "calling kernel_%(scalar_op)s_%(nodename)s_%(id_self)s w numEls" << numEls << "\\n"; std::cerr << "calling kernel_%(scalar_op)s_%(nodename)s_%(id_self)s w numEls" << numEls << "\\n";
""" %locals() """ %locals()
# DEBUGPRINT
print >> sio, 'std::cerr << ' + " << ' ' << ".join(['" "']+list("dims[%i]"%di print >> sio, 'std::cerr << ' + " << ' ' << ".join(['" "']+list("dims[%i]"%di
for di in xrange(nd)) + ["'\\n';"]) for di in xrange(nd)) + ["'\\n';"])
# DEBUGPRINT
for ipos in xrange(len(node.inputs)): for ipos in xrange(len(node.inputs)):
print >> sio, """ print >> sio, """
std::cerr << " %(ipos)s " << std::cerr << " %(ipos)s " <<
...@@ -700,7 +702,7 @@ class NaiveAlgo(object): ...@@ -700,7 +702,7 @@ class NaiveAlgo(object):
nd_collapse_size = nd_collapse_size_%(ipos)s; nd_collapse_size = nd_collapse_size_%(ipos)s;
} }
""" %locals() """ %locals()
# DEBUGPRINT if self.verbose:
print >> sio, 'std::cerr << " nd_collapse " << nd_collapse << " " << nd_collapse_size << "\\n";' print >> sio, 'std::cerr << " nd_collapse " << nd_collapse << " " << nd_collapse_size << "\\n";'
for ipos in xrange(len(node.inputs)): for ipos in xrange(len(node.inputs)):
print >> sio, "int local_i%(ipos)s_str[%(nd)s];"%locals() print >> sio, "int local_i%(ipos)s_str[%(nd)s];"%locals()
...@@ -714,7 +716,6 @@ class NaiveAlgo(object): ...@@ -714,7 +716,6 @@ class NaiveAlgo(object):
for d in xrange(nd): for d in xrange(nd):
print >> sio, "local_dims[%(d)s] = (%(d)s == nd_collapse) ? nd_collapse_size : dims[%(d)s];"%locals() print >> sio, "local_dims[%(d)s] = (%(d)s == nd_collapse) ? nd_collapse_size : dims[%(d)s];"%locals()
def launch_Ccontiguous(nodename, id_self, scalar_op): def launch_Ccontiguous(nodename, id_self, scalar_op):
kernel_call_args = ["numEls"] kernel_call_args = ["numEls"]
for ipos in xrange(len(node.inputs)): for ipos in xrange(len(node.inputs)):
...@@ -739,11 +740,12 @@ class NaiveAlgo(object): ...@@ -739,11 +740,12 @@ class NaiveAlgo(object):
return 0; return 0;
""" %locals() """ %locals()
def launch_tile4(): def launch_nd_collapse_2(nodename, id_self, scalar_op):
if (False and nd == 4): # tiling kernel if self.verbose:
print >> sio, """
std::cerr << " Running tiling 2D \\n";
"""
print >> sio, """ print >> sio, """
{
std::cerr << " Running tiling 4D \\n";
dim3 gridDim(dims[0], dims[1]); dim3 gridDim(dims[0], dims[1]);
dim3 blockDim; dim3 blockDim;
if (0) { if (0) {
...@@ -782,8 +784,6 @@ class NaiveAlgo(object): ...@@ -782,8 +784,6 @@ class NaiveAlgo(object):
} }
} }
return 0; return 0;
}
}
""" %locals() """ %locals()
def launch_General(nodename, id_self, scalar_op): def launch_General(nodename, id_self, scalar_op):
...@@ -792,7 +792,7 @@ class NaiveAlgo(object): ...@@ -792,7 +792,7 @@ class NaiveAlgo(object):
kernel_call_args.extend("dims[%i]"%di for di in xrange(nd)) kernel_call_args.extend("dims[%i]"%di for di in xrange(nd))
for ipos in xrange(len(node.inputs)): for ipos in xrange(len(node.inputs)):
kernel_call_args.append( kernel_call_args.append(
", ".join(["i%i_data"%ipos] + list("local_i%i_str[%i]"%(ipos, di) for di in xrange(nd))) ", ".join(["i%i_data"%ipos] + list("i%i_str[%i]"%(ipos, di) for di in xrange(nd)))
) )
#strides = ", ".join("i%i_str[%i]"%(ipos, di) for di in xrange(nd)) #strides = ", ".join("i%i_str[%i]"%(ipos, di) for di in xrange(nd))
#kernel_call_args.append( "%s, i%i_data" % (strides, ipos)) #kernel_call_args.append( "%s, i%i_data" % (strides, ipos))
...@@ -803,8 +803,11 @@ class NaiveAlgo(object): ...@@ -803,8 +803,11 @@ class NaiveAlgo(object):
#strides = ", ".join("o%i_str[%i]"%(ipos, di) for di in xrange(nd)) #strides = ", ".join("o%i_str[%i]"%(ipos, di) for di in xrange(nd))
#kernel_call_args.append( "%s, o%i_data" % (strides, ipos)) #kernel_call_args.append( "%s, o%i_data" % (strides, ipos))
kernel_call_args = ", ".join(kernel_call_args) kernel_call_args = ", ".join(kernel_call_args)
if self.verbose:
print >> sio, """ print >> sio, """
std::cerr << " Running general version \\n"; std::cerr << " Running general version \\n";
"""
print >> sio, """
int threads_per_block = std::min(numEls, (unsigned int)NUM_VECTOR_OP_THREADS_PER_BLOCK); int threads_per_block = std::min(numEls, (unsigned int)NUM_VECTOR_OP_THREADS_PER_BLOCK);
int n_blocks = std::min(numEls/threads_per_block + (numEls %% threads_per_block?1:0), (unsigned int)NUM_VECTOR_OP_BLOCKS); int n_blocks = std::min(numEls/threads_per_block + (numEls %% threads_per_block?1:0), (unsigned int)NUM_VECTOR_OP_BLOCKS);
kernel_%(scalar_op)s_%(nodename)s_%(id_self)s<<<n_blocks, threads_per_block>>>(%(kernel_call_args)s); kernel_%(scalar_op)s_%(nodename)s_%(id_self)s<<<n_blocks, threads_per_block>>>(%(kernel_call_args)s);
...@@ -837,11 +840,13 @@ class NaiveAlgo(object): ...@@ -837,11 +840,13 @@ class NaiveAlgo(object):
def c_support_code_apply(self, node, nodename): def c_support_code_apply(self, node, nodename):
return self.c_src_kernel(node, nodename) \ return "".join([
+ self.c_src_kernel_Ccontiguous(node, nodename) \ self.c_src_kernel(node, nodename),
+ self.c_src_kernel_tiling(node, nodename) \ self.c_src_kernel_Ccontiguous(node, nodename),
+ self.c_src_kernel_tiling_less_registers(node, nodename) \ #self.c_src_kernel_tiling(node, nodename),
+ self.c_src_callkernel(node, nodename) #self.c_src_kernel_tiling_less_registers(node, nodename),
self.c_src_callkernel(node, nodename),
])
def c_code(self, node, nodename, inputs, outputs, sub): def c_code(self, node, nodename, inputs, outputs, sub):
d = dict(sub) d = dict(sub)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论