提交 2a1aae56 authored 作者: Ian Goodfellow's avatar Ian Goodfellow

renamed GpuSum to GpuCAReduce and marked the broken methods as

NotImplemented
上级 18be4e51
...@@ -484,15 +484,15 @@ class GpuDimShuffle(GpuOp): ...@@ -484,15 +484,15 @@ class GpuDimShuffle(GpuOp):
return (1, 0) return (1, 0)
class GpuSum(GpuOp): class GpuCAReduce(GpuOp):
"""GpuSum is a Reduction along some dimensions by summation. """GpuCAReduce is a Reduction along some dimensions by a scalar op.
The dimensions along which to sum is specified by the The dimensions along which to reduce is specified by the
`reduce_mask` that you pass to the constructor. The `reduce_mask` `reduce_mask` that you pass to the constructor. The `reduce_mask`
is a tuple of booleans (actually integers 0 or 1) that specify for is a tuple of booleans (actually integers 0 or 1) that specify for
each input dimension, whether to reduce it (1) or not (0). each input dimension, whether to reduce it (1) or not (0).
For example: For example, when scalar_op is theano.scalar.basic.Add:
- reduce_mask == (1,) sums a vector to a scalar - reduce_mask == (1,) sums a vector to a scalar
...@@ -506,7 +506,7 @@ class GpuSum(GpuOp): ...@@ -506,7 +506,7 @@ class GpuSum(GpuOp):
be removed during graph optimization be removed during graph optimization
""" """
def __init__(self, reduce_mask): def __init__(self, reduce_mask, scalar_op):
self.reduce_mask = tuple(reduce_mask) self.reduce_mask = tuple(reduce_mask)
def __eq__(self, other): def __eq__(self, other):
...@@ -517,7 +517,10 @@ class GpuSum(GpuOp): ...@@ -517,7 +517,10 @@ class GpuSum(GpuOp):
return hash(type(self)) ^ hash(self.reduce_mask) return hash(type(self)) ^ hash(self.reduce_mask)
def __str__(self): def __str__(self):
return "GpuSum{%s}" % ','.join(str(i) for i in self.reduce_mask) return "GpuCAReduce{%s}{%s}" % (
str(self.scalar_op),
','.join(str(i) for i in self.reduce_mask)
)
def make_node(self, x): def make_node(self, x):
if (x.type.ndim != len(self.reduce_mask)): if (x.type.ndim != len(self.reduce_mask)):
...@@ -529,9 +532,11 @@ class GpuSum(GpuOp): ...@@ -529,9 +532,11 @@ class GpuSum(GpuOp):
def perform(self, node, inp, out): def perform(self, node, inp, out):
x, = inp x, = inp
z, = out z, = out
raise NotImplementedError() # TODO
z[0] = x.reduce_sum(self.reduce_mask) z[0] = x.reduce_sum(self.reduce_mask)
def c_code(self, node, name, inp, out, sub): def c_code(self, node, name, inp, out, sub):
raise NotImplementedError()
x, = inp x, = inp
z, = out z, = out
...@@ -627,6 +632,7 @@ class GpuSum(GpuOp): ...@@ -627,6 +632,7 @@ class GpuSum(GpuOp):
return sio.getvalue() return sio.getvalue()
def _makecall(self, node, name, x, z, fail, pattern=None): def _makecall(self, node, name, x, z, fail, pattern=None):
raise NotImplementedError()
"""Return a string for making a kernel call. """Return a string for making a kernel call.
The return value looks something like: The return value looks something like:
...@@ -713,6 +719,7 @@ class GpuSum(GpuOp): ...@@ -713,6 +719,7 @@ class GpuSum(GpuOp):
def _k_decl(self, node, nodename, pattern=None, def _k_decl(self, node, nodename, pattern=None,
ndim=None, reduce_mask=None): ndim=None, reduce_mask=None):
raise NotImplementedError()
"""Return a string to declare a kernel function """Return a string to declare a kernel function
.. code-block:: c .. code-block:: c
...@@ -762,6 +769,7 @@ class GpuSum(GpuOp): ...@@ -762,6 +769,7 @@ class GpuSum(GpuOp):
return sio.getvalue() return sio.getvalue()
def _k_init(self, *args): def _k_init(self, *args):
raise NotImplementedError()
return """ return """
const int threadCount = blockDim.x * blockDim.y * blockDim.z; const int threadCount = blockDim.x * blockDim.y * blockDim.z;
const int threadNum = threadIdx.z * blockDim.x * blockDim.y + threadIdx.y * blockDim.x + threadIdx.x; const int threadNum = threadIdx.z * blockDim.x * blockDim.y + threadIdx.y * blockDim.x + threadIdx.x;
...@@ -778,6 +786,7 @@ class GpuSum(GpuOp): ...@@ -778,6 +786,7 @@ class GpuSum(GpuOp):
""" """
def _k_reduce_buf(self, z_pos): def _k_reduce_buf(self, z_pos):
raise NotImplementedError()
# Work with all nvidia driver # Work with all nvidia driver
# But only for power or multiple of 2! # But only for power or multiple of 2!
new_version = """ new_version = """
...@@ -1021,6 +1030,7 @@ class GpuSum(GpuOp): ...@@ -1021,6 +1030,7 @@ class GpuSum(GpuOp):
self.c_code_reduce_01X(sio, node, name, x, z, fail, 3) self.c_code_reduce_01X(sio, node, name, x, z, fail, 3)
def c_code_reduce_10(self, sio, node, name, x, z, fail): def c_code_reduce_10(self, sio, node, name, x, z, fail):
raise NotImplementedError()
print >> sio, """ print >> sio, """
{ {
int verbose = 0; int verbose = 0;
...@@ -1070,6 +1080,7 @@ class GpuSum(GpuOp): ...@@ -1070,6 +1080,7 @@ class GpuSum(GpuOp):
""" % locals() """ % locals()
def c_code_reduce_010(self, sio, node, name, x, z, fail): def c_code_reduce_010(self, sio, node, name, x, z, fail):
raise NotImplementedError()
makecall = self._makecall(node, name, x, z, fail) makecall = self._makecall(node, name, x, z, fail)
makecall_inner = self._makecall(node, name, x, z, fail, makecall_inner = self._makecall(node, name, x, z, fail,
pattern="010_inner") pattern="010_inner")
...@@ -1192,6 +1203,7 @@ class GpuSum(GpuOp): ...@@ -1192,6 +1203,7 @@ class GpuSum(GpuOp):
""" % locals() """ % locals()
def c_code_reduce_0101(self, sio, node, name, x, z, fail): def c_code_reduce_0101(self, sio, node, name, x, z, fail):
raise NotImplementedError()
makecall = self._makecall(node, name, x, z, fail) makecall = self._makecall(node, name, x, z, fail)
print >> sio, """ print >> sio, """
{ {
...@@ -1211,6 +1223,7 @@ class GpuSum(GpuOp): ...@@ -1211,6 +1223,7 @@ class GpuSum(GpuOp):
""" % locals() """ % locals()
def c_code_reduce_100(self, sio, node, name, x, z, fail): def c_code_reduce_100(self, sio, node, name, x, z, fail):
raise NotImplementedError()
makecall = self._makecall(node, name, x, z, fail) makecall = self._makecall(node, name, x, z, fail)
# use threadIdx.x for i0 # use threadIdx.x for i0
# use blockIdx.x for i1 # use blockIdx.x for i1
...@@ -1231,6 +1244,7 @@ class GpuSum(GpuOp): ...@@ -1231,6 +1244,7 @@ class GpuSum(GpuOp):
""" % locals() """ % locals()
def c_code_reduce_110(self, sio, node, name, x, z, fail): def c_code_reduce_110(self, sio, node, name, x, z, fail):
raise NotImplementedError()
makecall = self._makecall(node, name, x, z, fail) makecall = self._makecall(node, name, x, z, fail)
print >> sio, """ print >> sio, """
{ {
...@@ -1252,6 +1266,7 @@ class GpuSum(GpuOp): ...@@ -1252,6 +1266,7 @@ class GpuSum(GpuOp):
""" % locals() """ % locals()
def c_code_reduce_001(self, sio, node, name, x, z, fail): def c_code_reduce_001(self, sio, node, name, x, z, fail):
raise NotImplementedError()
makecall = self._makecall(node, name, x, z, fail) makecall = self._makecall(node, name, x, z, fail)
print >> sio, """ print >> sio, """
{ {
...@@ -1274,6 +1289,7 @@ class GpuSum(GpuOp): ...@@ -1274,6 +1289,7 @@ class GpuSum(GpuOp):
""" % locals() """ % locals()
def c_code_reduce_111(self, sio, node, name, x, z, fail): def c_code_reduce_111(self, sio, node, name, x, z, fail):
raise NotImplementedError()
makecall = self._makecall(node, name, x, z, fail) makecall = self._makecall(node, name, x, z, fail)
print >> sio, """ print >> sio, """
{ {
...@@ -1306,6 +1322,7 @@ class GpuSum(GpuOp): ...@@ -1306,6 +1322,7 @@ class GpuSum(GpuOp):
""" % locals() """ % locals()
def c_code_reduce_0011(self, sio, node, name, x, z, fail): def c_code_reduce_0011(self, sio, node, name, x, z, fail):
raise NotImplementedError()
makecall = self._makecall(node, name, x, z, fail) makecall = self._makecall(node, name, x, z, fail)
print >> sio, """ print >> sio, """
{ {
...@@ -1336,6 +1353,7 @@ class GpuSum(GpuOp): ...@@ -1336,6 +1353,7 @@ class GpuSum(GpuOp):
""" % locals() """ % locals()
def c_code_reduce_1111(self, sio, node, name, x, z, fail): def c_code_reduce_1111(self, sio, node, name, x, z, fail):
raise NotImplementedError()
makecall = self._makecall(node, name, x, z, fail) makecall = self._makecall(node, name, x, z, fail)
print >> sio, """ print >> sio, """
{ {
...@@ -1368,6 +1386,7 @@ class GpuSum(GpuOp): ...@@ -1368,6 +1386,7 @@ class GpuSum(GpuOp):
""" % locals() """ % locals()
def c_code_reduce_1011(self, sio, node, name, x, z, fail): def c_code_reduce_1011(self, sio, node, name, x, z, fail):
raise NotImplementedError()
makecall = self._makecall(node, name, x, z, fail) makecall = self._makecall(node, name, x, z, fail)
print >> sio, """ print >> sio, """
{ {
...@@ -1392,9 +1411,10 @@ class GpuSum(GpuOp): ...@@ -1392,9 +1411,10 @@ class GpuSum(GpuOp):
""" % locals() """ % locals()
def c_code_cache_version(self): def c_code_cache_version(self):
return (22,) return (1,)
def c_support_code_apply(self, node, nodename): def c_support_code_apply(self, node, nodename):
raise NotImplementedError()
sio = StringIO.StringIO() sio = StringIO.StringIO()
nd_in = len(self.reduce_mask) nd_in = len(self.reduce_mask)
if all(i == 1 for i in self.reduce_mask): if all(i == 1 for i in self.reduce_mask):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论