提交 b5c075c7 authored 作者: notoraptor's avatar notoraptor

Try to fix errors.

上级 7c81e166
...@@ -932,7 +932,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype): ...@@ -932,7 +932,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
Z = (%(out_type)s *)(((char *)Z)+offset_Z); Z = (%(out_type)s *)(((char *)Z)+offset_Z);
""" % locals() """ % locals()
def _assign_init(self, first_item): def _assign_init(self, first_item, dtype='float32'):
""" """
This return the initial value for myresult. This return the initial value for myresult.
If the scalar op have an identity value, return it. If the scalar op have an identity value, return it.
...@@ -948,7 +948,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype): ...@@ -948,7 +948,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
assert isinstance(self.scalar_op, (scalar.Maximum, assert isinstance(self.scalar_op, (scalar.Maximum,
scalar.Minimum)) scalar.Minimum))
if self.pre_scalar_op: # TODO: multiple dtypes if self.pre_scalar_op: # TODO: multiple dtypes
dtype = self._acc_dtype(self.acc_dtype) # dtype = node.inputs[0].dtype
dummy_var = scalar.Scalar(dtype=dtype)() dummy_var = scalar.Scalar(dtype=dtype)()
...@@ -1707,7 +1707,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype): ...@@ -1707,7 +1707,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
""" % locals(), file=sio) """ % locals(), file=sio)
def c_code_cache_version_apply(self, node): def c_code_cache_version_apply(self, node):
version = [21] # the version corresponding to the c code in this Op version = [22] # the version corresponding to the c code in this Op
# now we insert versions for the ops on which we depend... # now we insert versions for the ops on which we depend...
scalar_node = Apply( scalar_node = Apply(
...@@ -1718,7 +1718,6 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype): ...@@ -1718,7 +1718,6 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
for i in node.inputs + node.outputs: for i in node.inputs + node.outputs:
version.extend(Scalar(dtype=i.type.dtype).c_code_cache_version()) version.extend(Scalar(dtype=i.type.dtype).c_code_cache_version())
version.extend(self.kernel_version(node)) version.extend(self.kernel_version(node))
version.extend(self._acc_dtype(self.acc_dtype))
if all(version): if all(version):
return tuple(version) return tuple(version)
else: else:
...@@ -1744,7 +1743,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype): ...@@ -1744,7 +1743,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
reduce_fct = self._assign_reduce(node, nodename, "myresult", reduce_fct = self._assign_reduce(node, nodename, "myresult",
load_in + "(A[i0])", load_in + "(A[i0])",
{}, True) {}, True)
reduce_init = self._assign_init(load_in + "(A[0])") reduce_init = self._assign_init(load_in + "(A[0])", acc_dtype)
kname = "kernel_reduce_ccontig" kname = "kernel_reduce_ccontig"
k_var = "kernel_reduce_ccontig_" + nodename k_var = "kernel_reduce_ccontig_" + nodename
sio = StringIO() sio = StringIO()
...@@ -1783,7 +1782,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype): ...@@ -1783,7 +1782,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
reduce_fct = self._assign_reduce(node, nodename, "myresult", reduce_fct = self._assign_reduce(node, nodename, "myresult",
load_in + "(A[i0 * sA0])", load_in + "(A[i0 * sA0])",
{}, True) {}, True)
reduce_init = self._assign_init(load_in + "(A[0])") reduce_init = self._assign_init(load_in + "(A[0])", acc_dtype)
kname = "kernel_reduce_1" kname = "kernel_reduce_1"
k_var = "kernel_reduce_1_" + nodename k_var = "kernel_reduce_1_" + nodename
sio = StringIO() sio = StringIO()
...@@ -1824,7 +1823,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype): ...@@ -1824,7 +1823,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
reduce_fct = self._assign_reduce(node, nodename, "myresult", reduce_fct = self._assign_reduce(node, nodename, "myresult",
load_in + "(A[i0 * sA0 + i1 * sA1])", load_in + "(A[i0 * sA0 + i1 * sA1])",
{}, True) {}, True)
reduce_init = self._assign_init(load_in + "(A[0])") reduce_init = self._assign_init(load_in + "(A[0])", acc_dtype)
kname = "kernel_reduce_11" kname = "kernel_reduce_11"
k_var = "kernel_reduce_11_" + nodename k_var = "kernel_reduce_11_" + nodename
sio = StringIO() sio = StringIO()
...@@ -1911,7 +1910,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype): ...@@ -1911,7 +1910,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
for i in xrange(nd_in)]) for i in xrange(nd_in)])
decl, kname, params, k_var = self._k_decl(node, nodename) decl, kname, params, k_var = self._k_decl(node, nodename)
init = self._k_init(node, nodename) init = self._k_init(node, nodename)
reduce_init = self._assign_init(load_in + "(A[%(first_i3)s * %(sA3)s + %(first_i2)s * %(sA2)s + %(first_i1)s * %(sA1)s + i0 * sA0])" % locals()) reduce_init = self._assign_init(load_in + "(A[%(first_i3)s * %(sA3)s + %(first_i2)s * %(sA2)s + %(first_i1)s * %(sA1)s + i0 * sA0])" % locals(), acc_dtype)
reduce_fct = self._assign_reduce( reduce_fct = self._assign_reduce(
node, nodename, "myresult", node, nodename, "myresult",
load_in + "(A[i3 * sA3 + i2 * sA2 + i1 * sA1 + i0 * sA0])", load_in + "(A[i3 * sA3 + i2 * sA2 + i1 * sA1 + i0 * sA0])",
...@@ -1948,7 +1947,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype): ...@@ -1948,7 +1947,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
reduce_fct = self._assign_reduce(node, nodename, "myresult", reduce_fct = self._assign_reduce(node, nodename, "myresult",
load_in + "(A[i0 * sA0 + i1 * sA1 + i2 * sA2])", load_in + "(A[i0 * sA0 + i1 * sA1 + i2 * sA2])",
{}, True) {}, True)
reduce_init = self._assign_init(load_in + "(A[i0 * sA0 + threadIdx.x * sA1 + i2 * sA2])") reduce_init = self._assign_init(load_in + "(A[i0 * sA0 + threadIdx.x * sA1 + i2 * sA2])", acc_dtype)
kname = "kernel_reduce_010" kname = "kernel_reduce_010"
k_var = "kernel_reduce_010_" + nodename k_var = "kernel_reduce_010_" + nodename
sio = StringIO() sio = StringIO()
...@@ -1995,7 +1994,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype): ...@@ -1995,7 +1994,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
reduce_fct = self._assign_reduce(node, nodename, "myresult", reduce_fct = self._assign_reduce(node, nodename, "myresult",
load_in + "(X[a * sX0 + b * sX1 + c * sX2])", load_in + "(X[a * sX0 + b * sX1 + c * sX2])",
{}, True) {}, True)
reduce_init = self._assign_init(load_in + "(X[a * sX0 + 0 * sX1 + c * sX2])") reduce_init = self._assign_init(load_in + "(X[a * sX0 + 0 * sX1 + c * sX2])", acc_dtype)
kname = "kernel_reduce_010_AD" kname = "kernel_reduce_010_AD"
k_var = "kernel_reduce_010_AD_" + nodename k_var = "kernel_reduce_010_AD_" + nodename
sio = StringIO() sio = StringIO()
...@@ -2062,7 +2061,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype): ...@@ -2062,7 +2061,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
reduce_fct = self._assign_reduce(node, nodename, "myresult", reduce_fct = self._assign_reduce(node, nodename, "myresult",
load_in + "(A[i0 * sA0 + i1 * sA1 + i2 * sA2])", load_in + "(A[i0 * sA0 + i1 * sA1 + i2 * sA2])",
{}, True) {}, True)
reduce_init = self._assign_init(load_in + "(A[i0 * sA0 + 0 * sA1 + i2 * sA2])") reduce_init = self._assign_init(load_in + "(A[i0 * sA0 + 0 * sA1 + i2 * sA2])", acc_dtype)
sio = StringIO() sio = StringIO()
print("""#include "cluda.h" print("""#include "cluda.h"
...@@ -2096,7 +2095,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype): ...@@ -2096,7 +2095,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
reduce_fct = self._assign_reduce(node, nodename, "myresult", reduce_fct = self._assign_reduce(node, nodename, "myresult",
load_in + "(A[i0 * sA0 + i1 * sA1 + blockIdx.x * sA2])", load_in + "(A[i0 * sA0 + i1 * sA1 + blockIdx.x * sA2])",
{}, True) {}, True)
reduce_init = self._assign_init(load_in + "(A[blockIdx.x * sA2])") reduce_init = self._assign_init(load_in + "(A[blockIdx.x * sA2])", acc_dtype)
kname = "kernel_reduce_110" kname = "kernel_reduce_110"
k_var = "kernel_reduce_110_" + nodename k_var = "kernel_reduce_110_" + nodename
sio = StringIO() sio = StringIO()
...@@ -2144,7 +2143,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype): ...@@ -2144,7 +2143,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
reduce_fct = self._assign_reduce(node, nodename, "myresult", reduce_fct = self._assign_reduce(node, nodename, "myresult",
load_in + "(A[i0 * sA0 + i1 * sA1 + i2 * sA2])", load_in + "(A[i0 * sA0 + i1 * sA1 + i2 * sA2])",
{}, True) {}, True)
reduce_init = self._assign_init(load_in + "(A[i1 * sA1 + i2 * sA2])") reduce_init = self._assign_init(load_in + "(A[i1 * sA1 + i2 * sA2])", acc_dtype)
sio = StringIO() sio = StringIO()
print("""#include "cluda.h" print("""#include "cluda.h"
...@@ -2175,7 +2174,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype): ...@@ -2175,7 +2174,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
reduce_fct = self._assign_reduce(node, nodename, "myresult", reduce_fct = self._assign_reduce(node, nodename, "myresult",
load_in + "(A[i0 * sA0 + i1 * sA1 + i2 * sA2])", load_in + "(A[i0 * sA0 + i1 * sA1 + i2 * sA2])",
{}, True) {}, True)
reduce_init = self._assign_init(load_in + "(A[0])") reduce_init = self._assign_init(load_in + "(A[0])", acc_dtype)
sio = StringIO() sio = StringIO()
print("""#include "cluda.h" print("""#include "cluda.h"
...@@ -2206,7 +2205,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype): ...@@ -2206,7 +2205,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
reduce_fct = self._assign_reduce(node, nodename, "myresult", reduce_fct = self._assign_reduce(node, nodename, "myresult",
load_in + "(A[i0 * sA0 + i1 * sA1 + i2 * sA2])", load_in + "(A[i0 * sA0 + i1 * sA1 + i2 * sA2])",
{}, True) {}, True)
reduce_init = self._assign_init(load_in + "(A[i0 * sA0 + i1 * sA1])") reduce_init = self._assign_init(load_in + "(A[i0 * sA0 + i1 * sA1])", acc_dtype)
kname = "kernel_reduce_001" kname = "kernel_reduce_001"
k_var = "kernel_reduce_001_" + nodename k_var = "kernel_reduce_001_" + nodename
sio = StringIO() sio = StringIO()
...@@ -2257,7 +2256,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype): ...@@ -2257,7 +2256,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
reduce_fct = self._assign_reduce(node, nodename, "myresult", reduce_fct = self._assign_reduce(node, nodename, "myresult",
load_in + "(A[i0 * sA0 + i1 * sA1 + i2 * sA2 + i3 * sA3])", load_in + "(A[i0 * sA0 + i1 * sA1 + i2 * sA2 + i3 * sA3])",
{}, True) {}, True)
reduce_init = self._assign_init(load_in + "(A[i0 * sA0 + i1 * sA1])") reduce_init = self._assign_init(load_in + "(A[i0 * sA0 + i1 * sA1])", acc_dtype)
sio = StringIO() sio = StringIO()
print("""#include "cluda.h" print("""#include "cluda.h"
...@@ -2294,7 +2293,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype): ...@@ -2294,7 +2293,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
reduce_fct = self._assign_reduce(node, nodename, "myresult", reduce_fct = self._assign_reduce(node, nodename, "myresult",
load_in + "(A[i0 * sA0 + i1 * sA1 + i2 * sA2 + i3 * sA3])", load_in + "(A[i0 * sA0 + i1 * sA1 + i2 * sA2 + i3 * sA3])",
{}, True) {}, True)
reduce_init = self._assign_init(load_in + "(A[i0 * sA0 + i2 * sA2])") reduce_init = self._assign_init(load_in + "(A[i0 * sA0 + i2 * sA2])", acc_dtype)
sio = StringIO() sio = StringIO()
print("""#include "cluda.h" print("""#include "cluda.h"
...@@ -2329,7 +2328,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype): ...@@ -2329,7 +2328,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
reduce_fct = self._assign_reduce(node, nodename, "myresult", reduce_fct = self._assign_reduce(node, nodename, "myresult",
load_in + "(A[i0 * sA0 + i1 * sA1 + i2 * sA2 + i3 * sA3])", load_in + "(A[i0 * sA0 + i1 * sA1 + i2 * sA2 + i3 * sA3])",
{}, True) {}, True)
reduce_init = self._assign_init(load_in + "(A[0])") reduce_init = self._assign_init(load_in + "(A[0])", acc_dtype)
sio = StringIO() sio = StringIO()
print("""#include "cluda.h" print("""#include "cluda.h"
...@@ -2359,7 +2358,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype): ...@@ -2359,7 +2358,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
reduce_fct = self._assign_reduce(node, nodename, "myresult", reduce_fct = self._assign_reduce(node, nodename, "myresult",
load_in + "(A[i0 * sA0 + blockIdx.x * sA1 + i2 * sA2 + i3 * sA3])", load_in + "(A[i0 * sA0 + blockIdx.x * sA1 + i2 * sA2 + i3 * sA3])",
{}, True) {}, True)
reduce_init = self._assign_init(load_in + "(A[blockIdx.x * sA1])") reduce_init = self._assign_init(load_in + "(A[blockIdx.x * sA1])", acc_dtype)
kname = "kernel_reduce_1011" kname = "kernel_reduce_1011"
k_var = "kernel_reduce_1011_" + nodename k_var = "kernel_reduce_1011_" + nodename
sio = StringIO() sio = StringIO()
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论