提交 f25eae1f authored 作者: f0k's avatar f0k

Cleaner inheritance for GpuAlloc.make_node()

上级 db74811a
...@@ -3261,9 +3261,9 @@ class GpuAllocEmpty(GpuOp): ...@@ -3261,9 +3261,9 @@ class GpuAllocEmpty(GpuOp):
"""Implement Alloc on the gpu, but without initializing memory.""" """Implement Alloc on the gpu, but without initializing memory."""
__props__ = () __props__ = ()
def make_node(self, *shape): @staticmethod
def validate_shape(shape):
sh = [tensor.as_tensor_variable(s) for s in shape] sh = [tensor.as_tensor_variable(s) for s in shape]
bcast = [] bcast = []
for s in sh: for s in sh:
if s.type.dtype[:3] not in ('int', 'uin'): if s.type.dtype[:3] not in ('int', 'uin'):
...@@ -3276,8 +3276,12 @@ class GpuAllocEmpty(GpuOp): ...@@ -3276,8 +3276,12 @@ class GpuAllocEmpty(GpuOp):
bcast.append(numpy.all(1 == const_shp)) bcast.append(numpy.all(1 == const_shp))
otype = CudaNdarrayType(dtype='float32', broadcastable=bcast) otype = CudaNdarrayType(dtype='float32', broadcastable=bcast)
output = otype() output = otype()
return sh, output
def make_node(self, *shape):
shape, output = self.validate_shape(shape)
output.values_eq_approx = tensor.type.values_eq_approx_always_true output.values_eq_approx = tensor.type.values_eq_approx_always_true
return Apply(self, sh, [output]) return Apply(self, shape, [output])
def perform(self, node, inputs, out_): def perform(self, node, inputs, out_):
out, = out_ out, = out_
...@@ -3324,7 +3328,6 @@ class GpuAllocEmpty(GpuOp): ...@@ -3324,7 +3328,6 @@ class GpuAllocEmpty(GpuOp):
return (1,) return (1,)
def do_constant_folding(self, node): def do_constant_folding(self, node):
# XXX: anything needed here?
return False return False
gpu_alloc_empty = GpuAllocEmpty() gpu_alloc_empty = GpuAllocEmpty()
...@@ -3351,14 +3354,13 @@ class GpuAlloc(GpuAllocEmpty): ...@@ -3351,14 +3354,13 @@ class GpuAlloc(GpuAllocEmpty):
return s return s
def make_node(self, value, *shape): def make_node(self, value, *shape):
node = super(GpuAlloc, self).make_node(*shape) #if there is unneeded transfert generated by the next line
node.outputs[0].owner = None
#if their is unneeded transfert generated by the next line
#the optimizer will remove them. #the optimizer will remove them.
v = as_cuda_ndarray_variable(value) v = as_cuda_ndarray_variable(value)
if v.ndim != len(shape): if v.ndim != len(shape):
value = tensor.shape_padleft(value, len(shape) - v.ndim) value = tensor.shape_padleft(value, len(shape) - v.ndim)
return Apply(self, [v] + node.inputs, node.outputs) shape, output = self.validate_shape(shape)
return Apply(self, [v] + shape, [output])
def perform(self, node, inputs, out_): def perform(self, node, inputs, out_):
# the super class (GpuAllocEmpty) allocates memory, we fill it # the super class (GpuAllocEmpty) allocates memory, we fill it
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论