提交 34142d69 authored 作者: nouiz's avatar nouiz

Merge pull request #997 from goodfeli/fix_elemwise_grad

Add validation of input for some ops
...@@ -873,6 +873,7 @@ def _populate_grad_dict(var_to_node_to_idx, ...@@ -873,6 +873,7 @@ def _populate_grad_dict(var_to_node_to_idx,
# populate grad_dict[var] and return it # populate grad_dict[var] and return it
def access_grad_cache(var): def access_grad_cache(var):
if var not in grad_dict: if var not in grad_dict:
# If var is not in grad_dict already, we must compute it
if var in var_to_node_to_idx: if var in var_to_node_to_idx:
terms = [] terms = []
node_to_idx = var_to_node_to_idx[var] node_to_idx = var_to_node_to_idx[var]
...@@ -895,6 +896,11 @@ def _populate_grad_dict(var_to_node_to_idx, ...@@ -895,6 +896,11 @@ def _populate_grad_dict(var_to_node_to_idx,
if isinstance(term.type, DisconnectedType): if isinstance(term.type, DisconnectedType):
continue continue
if hasattr(var,'ndim') and term.ndim != var.ndim:
raise ValueError(("%s.grad returned a term with"
" %d dimensions, but %d are required.") % (
str(node.op), term.ndim, var.ndim))
terms.append(term) terms.append(term)
# Add up the terms to get the total gradient on this variable # Add up the terms to get the total gradient on this variable
...@@ -911,6 +917,7 @@ def _populate_grad_dict(var_to_node_to_idx, ...@@ -911,6 +917,7 @@ def _populate_grad_dict(var_to_node_to_idx,
# this variable isn't connected to the cost in the computational # this variable isn't connected to the cost in the computational
# graph # graph
grad_dict[var] = DisconnectedType()() grad_dict[var] = DisconnectedType()()
# end if cache miss
return grad_dict[var] return grad_dict[var]
rval = [access_grad_cache(elem) for elem in wrt] rval = [access_grad_cache(elem) for elem in wrt]
......
...@@ -4441,6 +4441,10 @@ class IncSubtensor(Op): ...@@ -4441,6 +4441,10 @@ class IncSubtensor(Op):
def make_node(self, x, y, *inputs): def make_node(self, x, y, *inputs):
x, y = map(as_tensor_variable, [x, y]) x, y = map(as_tensor_variable, [x, y])
if y.ndim > x.ndim:
raise ValueError(("Trying to increment a %d-dimensional "
"subtensor with a %d-dimensional value.") % (x.ndim,
y.ndim))
inputs = tuple(map(Subtensor.my_as_scalar, inputs)) inputs = tuple(map(Subtensor.my_as_scalar, inputs))
idx_list = list(self.idx_list) idx_list = list(self.idx_list)
......
...@@ -101,6 +101,8 @@ class DimShuffle(Op): ...@@ -101,6 +101,8 @@ class DimShuffle(Op):
- new_order: a list representing the relationship between the - new_order: a list representing the relationship between the
input's dimensions and the output's dimensions. Each input's dimensions and the output's dimensions. Each
element of the list can either be an index or 'x'. element of the list can either be an index or 'x'.
Indices must be encoded as python integers, not
theano symbolic integers.
- inplace: if True, the output will be a view of the input. - inplace: if True, the output will be a view of the input.
If False, the output will be a copy of the input. If False, the output will be a copy of the input.
...@@ -119,10 +121,17 @@ class DimShuffle(Op): ...@@ -119,10 +121,17 @@ class DimShuffle(Op):
self.new_order = new_order self.new_order = new_order
self.inplace = inplace self.inplace = inplace
for i in xrange(len(new_order) - 1): for i, j in enumerate(new_order):
j = new_order[i] if j != 'x':
if j != 'x' and j in new_order[(i + 1):]: if not isinstance(j, int):
raise ValueError(( raise TypeError(
"DimShuffle indices must be python ints.")
if j >= len(input_broadcastable):
raise ValueError(("new_order[%d] is %d, but the input "
"only has %d axes.") %
(i,j,len(input_broadcastable)))
if j in new_order[(i + 1):]:
raise ValueError((
"The same input dimension may not appear twice in the " "The same input dimension may not appear twice in the "
"list of output dimensions", (new_order))) "list of output dimensions", (new_order)))
...@@ -379,7 +388,7 @@ PyArray_SetBaseObject(%(res)s, (PyObject*)%(basename)s); ...@@ -379,7 +388,7 @@ PyArray_SetBaseObject(%(res)s, (PyObject*)%(basename)s);
if v != 'x': if v != 'x':
grad_order[v] = i grad_order[v] = i
# Do not make the DimShuffle inplace as an optimization at the # Do not make the DimShuffle inplace as an optimization at the
# canonicalization optimization phase will remove the implace. # canonicalization optimization phase will remove the inplace.
# The inplace will be reintroduced automatically later in the graph. # The inplace will be reintroduced automatically later in the graph.
return [DimShuffle(gz.type.broadcastable, grad_order)( return [DimShuffle(gz.type.broadcastable, grad_order)(
Elemwise(scalar.identity)(gz))] Elemwise(scalar.identity)(gz))]
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论