提交 c981568e authored 作者: Ian Goodfellow's avatar Ian Goodfellow

updated verify_grad to not use constants, compile function only once per call

上级 b2c924cd
...@@ -10,7 +10,7 @@ import traceback #for overriding Op.__call__ ...@@ -10,7 +10,7 @@ import traceback #for overriding Op.__call__
import numpy, theano import numpy, theano
#from copy import copy as python_copy #from copy import copy as python_copy
from theano import gof from theano import gof, shared
from theano.gof import Variable, Op, utils, Type, Constant, Value from theano.gof import Variable, Op, utils, Type, Constant, Value
from theano.tensor.tsor_apply import Apply from theano.tensor.tsor_apply import Apply
...@@ -4099,9 +4099,7 @@ def verify_grad(fun, pt, n_tests=2, rng=None, eps=None, abs_tol=None, rel_tol=No ...@@ -4099,9 +4099,7 @@ def verify_grad(fun, pt, n_tests=2, rng=None, eps=None, abs_tol=None, rel_tol=No
f = compile.function(inputs, output, accept_inplace=True, mode=mode) f = compile.function(inputs, output, accept_inplace=True, mode=mode)
return f return f
for test_num in xrange(n_tests): tensor_pt = [TensorType(as_tensor_variable(p).dtype, as_tensor_variable(p).broadcastable)(name='input %i'%i) for i,p in enumerate(pt)]
tensor_pt = [value(p.copy(), name='input %i'%i) for i,p in enumerate(pt)]
#fun can be either a function or an actual Op instance #fun can be either a function or an actual Op instance
o_output = fun(*tensor_pt) o_output = fun(*tensor_pt)
...@@ -4114,6 +4112,7 @@ def verify_grad(fun, pt, n_tests=2, rng=None, eps=None, abs_tol=None, rel_tol=No ...@@ -4114,6 +4112,7 @@ def verify_grad(fun, pt, n_tests=2, rng=None, eps=None, abs_tol=None, rel_tol=No
o_fn = function(tensor_pt, o_output) o_fn = function(tensor_pt, o_output)
o_fn_out = o_fn(*[p.copy() for p in pt]) o_fn_out = o_fn(*[p.copy() for p in pt])
if isinstance(o_fn_out, tuple) or isinstance(o_fn_out, list): if isinstance(o_fn_out, tuple) or isinstance(o_fn_out, list):
raise TypeError('It seems like you are trying to use verify_grad ' raise TypeError('It seems like you are trying to use verify_grad '
'on an op or a function which outputs a list: there should' 'on an op or a function which outputs a list: there should'
...@@ -4121,28 +4120,29 @@ def verify_grad(fun, pt, n_tests=2, rng=None, eps=None, abs_tol=None, rel_tol=No ...@@ -4121,28 +4120,29 @@ def verify_grad(fun, pt, n_tests=2, rng=None, eps=None, abs_tol=None, rel_tol=No
# random_projection should not have elements too small, # random_projection should not have elements too small,
# otherwise too much precision is lost in numerical gradient # otherwise too much precision is lost in numerical gradient
random_projection = rng.rand(*o_fn_out.shape) + 0.5 def random_projection():
plain = rng.rand(*o_fn_out.shape) + 0.5
if cast_to_output_type: if cast_to_output_type:
random_projection = numpy.array(random_projection, return numpy.array(plain,o_output.dtype)
dtype=o_output.dtype) return plain
t_r = as_tensor_variable(random_projection) t_r = shared(random_projection())
#random projection of o onto t_r #random projection of o onto t_r
cost = sum(t_r * o_output) #This sum() is defined above, it's not the builtin sum. cost = sum(t_r * o_output) #This sum() is defined above, it's not the builtin sum.
cost_fn = function(tensor_pt, cost) cost_fn = function(tensor_pt, cost)
num_grad = numeric_grad(cost_fn, [p.copy() for p in pt], eps) #todo-- determine if this is actually needed
g_cost = as_tensor_variable(1.0,name='g_cost') g_cost = as_tensor_variable(1.0,name='g_cost')
if cast_to_output_type: if cast_to_output_type:
g_cost = cast(g_cost, o_output.dtype) g_cost = cast(g_cost, o_output.dtype)
symbolic_grad = grad(cost, tensor_pt, g_cost) symbolic_grad = grad(cost, tensor_pt, g_cost)
grad_fn = function(tensor_pt, symbolic_grad) grad_fn = function(tensor_pt, symbolic_grad)
for test_num in xrange(n_tests):
num_grad = numeric_grad(cost_fn, [p.copy() for p in pt], eps)
analytic_grad = grad_fn(*[p.copy() for p in pt]) analytic_grad = grad_fn(*[p.copy() for p in pt])
if not isinstance(analytic_grad, (list, tuple)): if not isinstance(analytic_grad, (list, tuple)):
...@@ -4155,6 +4155,10 @@ def verify_grad(fun, pt, n_tests=2, rng=None, eps=None, abs_tol=None, rel_tol=No ...@@ -4155,6 +4155,10 @@ def verify_grad(fun, pt, n_tests=2, rng=None, eps=None, abs_tol=None, rel_tol=No
raise verify_grad.E_grad(max_arg, max_err_pos, raise verify_grad.E_grad(max_arg, max_err_pos,
max_abs_err, max_rel_err, abs_tol, rel_tol) max_abs_err, max_rel_err, abs_tol, rel_tol)
#get new random projection for next test
if test_num < n_tests - 1:
t_r.value = random_projection()
class GradientError(Exception): class GradientError(Exception):
"""This error is raised when a gradient is calculated, but incorrect.""" """This error is raised when a gradient is calculated, but incorrect."""
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论