提交 9d55e60f authored 作者: James Bergstra's avatar James Bergstra

Various modifs to make Xent tests pass with new ShapeFeature.

上级 c6fc7c59
......@@ -6,7 +6,7 @@
from theano import gof
from theano import printing
from theano.tensor import basic as tensor
from theano.tensor import elemwise
from theano.tensor import elemwise, dmatrix, fmatrix, dvector, fvector
from theano.tensor import opt
from theano.compile import optdb
import numpy
......@@ -919,6 +919,15 @@ def _check_rows_is_arange_len_labels(rows, labels):
shape_of = stop.owner.env.shape_feature.shape_of
return shape_of[labels][0] is stop
def _is_const(z, val, approx=False):
try:
maybe = opt.get_constant_value(z)
except TypeError:
return False
if approx:
return numpy.allclose(maybe,val)
else:
return numpy.all(maybe == val)
@opt.register_specialize
@gof.local_optimizer([])
def local_advanced_indexing_crossentropy_onehot(node):
......@@ -969,7 +978,7 @@ def local_advanced_indexing_crossentropy_onehot_grad(node):
except:
return
if sm is not None and sm.owner and sm.owner.op in (softmax, softmax_with_bias):
if (sm is not None) and sm.owner and (sm.owner.op in (softmax, softmax_with_bias)):
sm_w_bias = local_softmax_with_bias.transform(sm.owner)
if sm_w_bias:
assert sm_w_bias[0].owner.op == softmax_with_bias
......@@ -1023,13 +1032,7 @@ def local_advanced_indexing_crossentropy_onehot_grad(node):
return
# Check that z == zeros_like(softmax(x))
if z.owner and z.owner.op == tensor.fill:
model, value = z.owner.inputs
if not (model is sm and hasattr(value, 'data') and numpy.all(value.data == 0)):
return
#else: OK
else:
if not _is_const(z, 0):
return
# In the base case (output gradient = 1), incr is -1./sm[arange(len(y)), y]
......@@ -1112,11 +1115,17 @@ def local_advanced_indexing_crossentropy_onehot_grad(node):
# Second case
elif out_grad.owner and out_grad.owner.op == tensor.true_div:
# we know
# we're looking for
# AdvIncSubtensor(zeros, grad_nll, arange(len(y)), y) / softmax
try:
num, denom = out_grad.owner.inputs
except:
return
if denom != sm:
return
# Check the numerator (AdvancedIncSubtensor)
if num.owner and isinstance(num.owner.op, tensor.AdvancedIncSubtensor):
try:
......@@ -1125,6 +1134,14 @@ def local_advanced_indexing_crossentropy_onehot_grad(node):
return
# Check z is zeros_like(log(sm))
# JB - do we really care if this is zeros?
if not _is_const(z, 0):
return
if z.type not in (dmatrix, fmatrix):
return
# here we know that we are incrementing a matrix of zeros
if 0:
if z.owner and z.owner.op == tensor.fill:
model, value = z.owner.inputs
......@@ -1142,6 +1159,21 @@ def local_advanced_indexing_crossentropy_onehot_grad(node):
else:
return
if incr.type not in (dvector, fvector):
return
# here we know that we are incrementing some part of matrix z by a vector
# unless the user has taken care to mark that the data and labels have the
# same number of rows, we cannot be sure here that
# len(y) == len(z)
# However, in the common case that these are predictions and labels it is true.
# We leave it to the Op to crash (and the user to complain) if this assumption is
# ever not true.
outgrad_factor = None
if 0:
# Check incr is ((-1.) like log(softmax(x))[arange(len(y)), y])
if incr.owner and incr.owner.op == tensor.fill:
model, value = incr.owner.inputs
......@@ -1189,10 +1221,7 @@ def local_advanced_indexing_crossentropy_onehot_grad(node):
return
# else, arguments of AdvancedIncSubtensor are OK
# Check the denominator (sm)
if not denom is sm:
return
return [crossentropy_softmax_1hot_with_bias_dx(-incr, sm, labels)]
# else, numerator and denominator are OK,
# it was really case 2.
......
......@@ -306,14 +306,22 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase):
# Verify the optimizer worked on the expressions
f = theano.function([x,y], expr, mode=mode)
if verbose: print_graph(f)
try:
assert len(f.maker.env.toposort()) == 4
f(x_val, y_val)
except:
theano.printing.debugprint(f)
raise
# Also verify the gradient wrt x
g = theano.function([x,y], T.grad(expr, x), mode=mode)
if verbose: print_graph(g)
try:
assert len(g.maker.env.toposort()) == 4
g(x_val, y_val)
except:
theano.printing.debugprint(g)
raise
## Test that a biased softmax is optimized correctly
......@@ -326,13 +334,21 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase):
for expr in bias_expressions:
f = theano.function([x,b,y], expr, mode=mode)
if verbose: print_graph(f)
try:
assert len(f.maker.env.toposort()) == 2 # [big_op, sum]
f(x_val, b_val, y_val)
except:
theano.printing.debugprint(f)
raise
g = theano.function([x,b,y], T.grad(expr, x), mode=mode)
if verbose: print_graph(g)
try:
assert len(g.maker.env.toposort()) == 4
g(x_val, b_val, y_val)
except:
theano.printing.debugprint(g)
raise
## Test that using "mean" instead of sum works, too
mean_expressions = [
......@@ -344,13 +360,22 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase):
for expr in mean_expressions:
f = theano.function([x,y], expr, mode=mode)
if verbose: print_graph(f)
assert len(f.maker.env.toposort()) == 7
try:
assert len(f.maker.env.toposort()) == 6
f(x_val, y_val)
except:
theano.printing.debugprint(f)
raise
g = theano.function([x,y], T.grad(expr, x), mode=mode)
if verbose: print_graph(g)
assert len(g.maker.env.toposort()) == 8
try:
assert len(g.maker.env.toposort()) in (6,7) #there's an extra dimshuffle in there
# but I can't think of a good rule to get rid of it
g(x_val, y_val)
except:
theano.printing.debugprint(g)
raise
mean_bias_expressions = [
T.mean(-T.log(softmax(x+b)[T.arange(y.shape[0]), y])),
......@@ -361,12 +386,20 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase):
for expr in mean_bias_expressions:
f = theano.function([x,b,y], expr, mode=mode)
if verbose: print_graph(f)
assert len(f.maker.env.toposort()) == 5
try:
assert len(f.maker.env.toposort()) == 4
except:
theano.printing.debugprint(f)
raise
g = theano.function([x,b,y], T.grad(expr, x), mode=mode)
if verbose: print_graph(g)
assert len(g.maker.env.toposort()) == 8
try:
assert len(g.maker.env.toposort()) in (6,7)
g(x_val, b_val, y_val)
except:
theano.printing.debugprint(g)
raise
def test_scale_cost(self):
......@@ -450,21 +483,33 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase):
for expr in expressions:
# Verify the optimizer worked on the expressions
f = theano.function([x,y,a], expr, mode=mode)
try:
assert 5 <= len(f.maker.env.toposort()) <= 10
validate_fn_graph(f)
f(x_val, y_val, 0.1)
except:
theano.printing.debugprint(f)
raise
# Verify the gradient wrt x
g = theano.function([x,y,a], T.grad(expr, x), mode=mode)
try:
assert 5 <= len(g.maker.env.toposort()) <= 12
validate_grad_graph(g)
g(x_val, y_val, 0.1)
except:
theano.printing.debugprint(g)
raise
# Verify the gradient when providing output gradient
h = theano.function([x,y,a], T.grad(expr, x, g_cost=a*x.sum()), mode=mode)
try:
assert 8 <= len(h.maker.env.toposort()) <= 17
validate_grad_graph(h)
h(x_val, y_val, 0.1)
except:
theano.printing.debugprint(h)
raise
def test_argmax_pushdown():
......
......@@ -80,9 +80,12 @@ def get_constant_value(v):
return v.data
except:
raise TypeError(v)
if v.owner and isinstance(v.owner.op, T.DimShuffle):
if v.owner:
if isinstance(v.owner.op, T.Alloc):
return get_constant_value(v.owner.inputs[0])
if v.owner and v.owner.op == T.fill:
if isinstance(v.owner.op, T.DimShuffle):
return get_constant_value(v.owner.inputs[0])
if v.owner.op == T.fill:
shape, val = v.owner.inputs
# fill(a,b) fills the shape of 'a' filled with 'b'
return get_constant_value(val)
......@@ -530,6 +533,20 @@ def local_subtensor_make_vector(node):
_logger.error('failed to index with "%s"' % str(idx))
raise
@register_specialize
@gof.local_optimizer([T.Alloc])
def local_alloc_unary(node):
"""unary(alloc(x, shp)) -> alloc(unary(x), shp)
"""
if isinstance(node.op, T.Elemwise) and len(node.inputs)==1:
x = node.inputs[0]
if x.owner and isinstance(x.owner.op, T.Alloc):
return [T.Alloc(node.outputs[0].dtype)(
node.op(T.cast(x.owner.inputs[0], x.dtype)),
*x.owner.inputs[1:]
)]
##################
# Subtensor opts #
##################
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论