提交 b5c075c7 authored 作者: notoraptor's avatar notoraptor

Try to fix errors.

上级 7c81e166
......@@ -932,7 +932,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
Z = (%(out_type)s *)(((char *)Z)+offset_Z);
""" % locals()
def _assign_init(self, first_item):
def _assign_init(self, first_item, dtype='float32'):
"""
This return the initial value for myresult.
If the scalar op have an identity value, return it.
......@@ -948,7 +948,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
assert isinstance(self.scalar_op, (scalar.Maximum,
scalar.Minimum))
if self.pre_scalar_op: # TODO: multiple dtypes
dtype = self._acc_dtype(self.acc_dtype)
# dtype = node.inputs[0].dtype
dummy_var = scalar.Scalar(dtype=dtype)()
......@@ -1707,7 +1707,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
""" % locals(), file=sio)
def c_code_cache_version_apply(self, node):
version = [21] # the version corresponding to the c code in this Op
version = [22] # the version corresponding to the c code in this Op
# now we insert versions for the ops on which we depend...
scalar_node = Apply(
......@@ -1718,7 +1718,6 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
for i in node.inputs + node.outputs:
version.extend(Scalar(dtype=i.type.dtype).c_code_cache_version())
version.extend(self.kernel_version(node))
version.extend(self._acc_dtype(self.acc_dtype))
if all(version):
return tuple(version)
else:
......@@ -1744,7 +1743,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
reduce_fct = self._assign_reduce(node, nodename, "myresult",
load_in + "(A[i0])",
{}, True)
reduce_init = self._assign_init(load_in + "(A[0])")
reduce_init = self._assign_init(load_in + "(A[0])", acc_dtype)
kname = "kernel_reduce_ccontig"
k_var = "kernel_reduce_ccontig_" + nodename
sio = StringIO()
......@@ -1783,7 +1782,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
reduce_fct = self._assign_reduce(node, nodename, "myresult",
load_in + "(A[i0 * sA0])",
{}, True)
reduce_init = self._assign_init(load_in + "(A[0])")
reduce_init = self._assign_init(load_in + "(A[0])", acc_dtype)
kname = "kernel_reduce_1"
k_var = "kernel_reduce_1_" + nodename
sio = StringIO()
......@@ -1824,7 +1823,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
reduce_fct = self._assign_reduce(node, nodename, "myresult",
load_in + "(A[i0 * sA0 + i1 * sA1])",
{}, True)
reduce_init = self._assign_init(load_in + "(A[0])")
reduce_init = self._assign_init(load_in + "(A[0])", acc_dtype)
kname = "kernel_reduce_11"
k_var = "kernel_reduce_11_" + nodename
sio = StringIO()
......@@ -1911,7 +1910,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
for i in xrange(nd_in)])
decl, kname, params, k_var = self._k_decl(node, nodename)
init = self._k_init(node, nodename)
reduce_init = self._assign_init(load_in + "(A[%(first_i3)s * %(sA3)s + %(first_i2)s * %(sA2)s + %(first_i1)s * %(sA1)s + i0 * sA0])" % locals())
reduce_init = self._assign_init(load_in + "(A[%(first_i3)s * %(sA3)s + %(first_i2)s * %(sA2)s + %(first_i1)s * %(sA1)s + i0 * sA0])" % locals(), acc_dtype)
reduce_fct = self._assign_reduce(
node, nodename, "myresult",
load_in + "(A[i3 * sA3 + i2 * sA2 + i1 * sA1 + i0 * sA0])",
......@@ -1948,7 +1947,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
reduce_fct = self._assign_reduce(node, nodename, "myresult",
load_in + "(A[i0 * sA0 + i1 * sA1 + i2 * sA2])",
{}, True)
reduce_init = self._assign_init(load_in + "(A[i0 * sA0 + threadIdx.x * sA1 + i2 * sA2])")
reduce_init = self._assign_init(load_in + "(A[i0 * sA0 + threadIdx.x * sA1 + i2 * sA2])", acc_dtype)
kname = "kernel_reduce_010"
k_var = "kernel_reduce_010_" + nodename
sio = StringIO()
......@@ -1995,7 +1994,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
reduce_fct = self._assign_reduce(node, nodename, "myresult",
load_in + "(X[a * sX0 + b * sX1 + c * sX2])",
{}, True)
reduce_init = self._assign_init(load_in + "(X[a * sX0 + 0 * sX1 + c * sX2])")
reduce_init = self._assign_init(load_in + "(X[a * sX0 + 0 * sX1 + c * sX2])", acc_dtype)
kname = "kernel_reduce_010_AD"
k_var = "kernel_reduce_010_AD_" + nodename
sio = StringIO()
......@@ -2062,7 +2061,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
reduce_fct = self._assign_reduce(node, nodename, "myresult",
load_in + "(A[i0 * sA0 + i1 * sA1 + i2 * sA2])",
{}, True)
reduce_init = self._assign_init(load_in + "(A[i0 * sA0 + 0 * sA1 + i2 * sA2])")
reduce_init = self._assign_init(load_in + "(A[i0 * sA0 + 0 * sA1 + i2 * sA2])", acc_dtype)
sio = StringIO()
print("""#include "cluda.h"
......@@ -2096,7 +2095,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
reduce_fct = self._assign_reduce(node, nodename, "myresult",
load_in + "(A[i0 * sA0 + i1 * sA1 + blockIdx.x * sA2])",
{}, True)
reduce_init = self._assign_init(load_in + "(A[blockIdx.x * sA2])")
reduce_init = self._assign_init(load_in + "(A[blockIdx.x * sA2])", acc_dtype)
kname = "kernel_reduce_110"
k_var = "kernel_reduce_110_" + nodename
sio = StringIO()
......@@ -2144,7 +2143,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
reduce_fct = self._assign_reduce(node, nodename, "myresult",
load_in + "(A[i0 * sA0 + i1 * sA1 + i2 * sA2])",
{}, True)
reduce_init = self._assign_init(load_in + "(A[i1 * sA1 + i2 * sA2])")
reduce_init = self._assign_init(load_in + "(A[i1 * sA1 + i2 * sA2])", acc_dtype)
sio = StringIO()
print("""#include "cluda.h"
......@@ -2175,7 +2174,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
reduce_fct = self._assign_reduce(node, nodename, "myresult",
load_in + "(A[i0 * sA0 + i1 * sA1 + i2 * sA2])",
{}, True)
reduce_init = self._assign_init(load_in + "(A[0])")
reduce_init = self._assign_init(load_in + "(A[0])", acc_dtype)
sio = StringIO()
print("""#include "cluda.h"
......@@ -2206,7 +2205,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
reduce_fct = self._assign_reduce(node, nodename, "myresult",
load_in + "(A[i0 * sA0 + i1 * sA1 + i2 * sA2])",
{}, True)
reduce_init = self._assign_init(load_in + "(A[i0 * sA0 + i1 * sA1])")
reduce_init = self._assign_init(load_in + "(A[i0 * sA0 + i1 * sA1])", acc_dtype)
kname = "kernel_reduce_001"
k_var = "kernel_reduce_001_" + nodename
sio = StringIO()
......@@ -2257,7 +2256,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
reduce_fct = self._assign_reduce(node, nodename, "myresult",
load_in + "(A[i0 * sA0 + i1 * sA1 + i2 * sA2 + i3 * sA3])",
{}, True)
reduce_init = self._assign_init(load_in + "(A[i0 * sA0 + i1 * sA1])")
reduce_init = self._assign_init(load_in + "(A[i0 * sA0 + i1 * sA1])", acc_dtype)
sio = StringIO()
print("""#include "cluda.h"
......@@ -2294,7 +2293,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
reduce_fct = self._assign_reduce(node, nodename, "myresult",
load_in + "(A[i0 * sA0 + i1 * sA1 + i2 * sA2 + i3 * sA3])",
{}, True)
reduce_init = self._assign_init(load_in + "(A[i0 * sA0 + i2 * sA2])")
reduce_init = self._assign_init(load_in + "(A[i0 * sA0 + i2 * sA2])", acc_dtype)
sio = StringIO()
print("""#include "cluda.h"
......@@ -2329,7 +2328,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
reduce_fct = self._assign_reduce(node, nodename, "myresult",
load_in + "(A[i0 * sA0 + i1 * sA1 + i2 * sA2 + i3 * sA3])",
{}, True)
reduce_init = self._assign_init(load_in + "(A[0])")
reduce_init = self._assign_init(load_in + "(A[0])", acc_dtype)
sio = StringIO()
print("""#include "cluda.h"
......@@ -2359,7 +2358,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
reduce_fct = self._assign_reduce(node, nodename, "myresult",
load_in + "(A[i0 * sA0 + blockIdx.x * sA1 + i2 * sA2 + i3 * sA3])",
{}, True)
reduce_init = self._assign_init(load_in + "(A[blockIdx.x * sA1])")
reduce_init = self._assign_init(load_in + "(A[blockIdx.x * sA1])", acc_dtype)
kname = "kernel_reduce_1011"
k_var = "kernel_reduce_1011_" + nodename
sio = StringIO()
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论