提交 95725506 authored 作者: Arnaud Bergeron's avatar Arnaud Bergeron

Replace Alloc by a shell around the tensor one.

上级 3f9dc6c6
...@@ -6,6 +6,7 @@ import theano ...@@ -6,6 +6,7 @@ import theano
from theano import Op, Type, Apply, Variable, Constant from theano import Op, Type, Apply, Variable, Constant
from theano import tensor, scalar, config from theano import tensor, scalar, config
from theano.scalar import Scalar from theano.scalar import Scalar
from theano.tensor.basic import Alloc
from theano.gof.python25 import all, any from theano.gof.python25 import all, any
from theano.gof.utils import MethodNotDefined from theano.gof.utils import MethodNotDefined
...@@ -473,55 +474,30 @@ class CudaFromGpu(Op): ...@@ -473,55 +474,30 @@ class CudaFromGpu(Op):
cuda_from_gpu = CudaFromGpu() cuda_from_gpu = CudaFromGpu()
class GpuAlloc(Op): class GpuAlloc(HideC, Alloc):
def __str__(self): def __str__(self):
return 'GpuAlloc' return 'GpuAlloc'
def __hash__(self):
return hash(type(self))
def __eq__(self, other):
return type(self) == type(other)
def make_node(self, value, *shape): def make_node(self, value, *shape):
v = as_gpuarray_variable(value) res = Alloc.make_node(self, value, *shape)
sh = [tensor.as_tensor_variable(s) for s in shape] value = as_gpuarray_variable(value)
bcast = [] otype = GpuArrayType(dtype=res.outputs[0].dtype,
if v.ndim > len(shape): broadcastable=res.outputs[0].broadcastable)
raise TypeError( return Apply(self, [value] + res.inputs[1:], [otype()])
'GpuAlloc value has more dimensions than arguments',
value.ndim, len(shape))
for i, s in enumerate(sh):
if s.type.dtype[:3] not in ('int', 'uint'):
raise TypeError('Shape arguments must be integers', s)
try:
const_shp = tensor.get_scalar_constant_value(s)
except tensor.NotScalarConstantError:
const_shp = None
bcast.append(numpy.all(1 == const_shp))
otype = GpuArrayType(dtype=v.dtype, broadcastable=bcast)
return Apply(self, [v] + sh, [otype()])
def perform(self, node, inputs, outs): def perform(self, node, inputs, outs):
out, = outs out, = outs
v = inputs[0] v = inputs[0]
sh = tuple(map(int, inputs[1:])) sh = tuple(map(int, inputs[1:]))
if out[0] is None or out[0].shape != sh: if out[0] is None or out[0].shape != sh:
out[0] = gpuarray.empty(sh, dtype=v.dtype) if v.size == 1 and numpy.asarray(v)[0].item() == 0:
out[0][...] = v out[0] = gpuarray.zeros(sh, dtype=v.dtype)
else:
def infer_shape(self, node, input_shapes): out[0] = gpuarray.empty(sh, dtype=v.dtype)
return [node.inputs[1:]] out[0][...] = v
else:
def grad(self, input, grads): out[0][...] = v
return [None for i in inputs] if config.gpuarray.sync:
out[0].sync()
def do_constant_folding(self, node):
if not getattr(node.ouputs[0], 'clients', []):
return False
for client in node.outputs[0].clients:
if client[0] == 'output':
return False
return True
gpu_alloc = GpuAlloc() gpu_alloc = GpuAlloc()
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论