提交 6b16e20c authored 作者: James Bergstra's avatar James Bergstra

merge

import time
import numpy
import theano
y = theano.tensor.fvector()
x = theano.shared(numpy.zeros(1,dtype='float32'))
f1 = theano.function([y],updates={x:y})
f2 = theano.function([],theano.sandbox.cuda.host_from_gpu(x))
print f1.maker.env.toposort()
print f2.maker.env.toposort()
for i in [1,10,100,1000, 10000, 100000,1000000, 10000000]:
o = numpy.zeros(i, dtype='float32')
t0=time.time();f1(o);t1=time.time();
tf1=t1-t0
t0=time.time();f2();t1=time.time();
print "%8i %6.1f ns %7.1f ns"%(i, tf1*1e6,(t1-t0)*1e6)
...@@ -815,11 +815,11 @@ class MRG_RandomStreams(object): ...@@ -815,11 +815,11 @@ class MRG_RandomStreams(object):
else: else:
final_samples = normal_samples[:prod(size)] final_samples = normal_samples[:prod(size)]
final_samples = avg + std * final_samples
if size: if size:
final_samples = final_samples.reshape(size) final_samples = final_samples.reshape(size)
final_samples = avg + std * final_samples
return final_samples return final_samples
@local_optimizer([None]) @local_optimizer([None])
......
...@@ -294,21 +294,29 @@ def basictest(f, steps, sample_size, prefix="", allow_01=False, inputs=[], ...@@ -294,21 +294,29 @@ def basictest(f, steps, sample_size, prefix="", allow_01=False, inputs=[],
ival = numpy.asarray(ival) ival = numpy.asarray(ival)
if i == 0: if i == 0:
mean = numpy.array(ival, copy=True) mean = numpy.array(ival, copy=True)
avg_std = numpy.std(ival) #avg_std = numpy.std(ival)
avg_std = numpy.sqrt(numpy.mean((ival - target_avg)**2))
min_ = ival.min() min_ = ival.min()
max_ = ival.max() max_ = ival.max()
else: else:
alpha = 1.0 / (1+i) alpha = 1.0 / (1+i)
mean = alpha * ival + (1-alpha)*mean mean = alpha * ival + (1-alpha)*mean
avg_std = alpha * numpy.std(ival) + (1-alpha)*avg_std #avg_std = alpha * numpy.std(ival) + (1-alpha)*avg_std
avg_std = alpha * numpy.sqrt(numpy.mean((ival - target_avg)**2)) + (1-alpha)*avg_std
min_ = min(min_,ival.min()) min_ = min(min_,ival.min())
max_ = max(max_,ival.max()) max_ = max(max_,ival.max())
if not allow_01: if not allow_01:
assert min_ > 0 assert min_ > 0
assert max_ < 1 assert max_ < 1
print prefix, 'mean', numpy.mean(mean) if hasattr(target_avg, 'shape'): # looks if target_avg is an array
assert abs(numpy.mean(mean) - target_avg) < mean_rtol, 'bad mean? %f %f'%(numpy.mean(mean), target_avg) diff = numpy.mean(abs(mean - target_avg))
print prefix, 'mean diff with mean', diff
assert diff < mean_rtol, 'bad mean? %f %f' % (mean, target_avg)
else: # if target_avg is a scalar, then we can do the mean of `mean` to get something more precise
mean = numpy.mean(mean)
print prefix, 'mean', mean
assert abs(mean - target_avg) < mean_rtol, 'bad mean? %f %f'%(numpy.mean(mean), target_avg)
print prefix, 'std', avg_std print prefix, 'std', avg_std
if target_std is not None: if target_std is not None:
assert abs(avg_std - target_std) < .01, 'bad std? %f %f'%(avg_std, target_std) assert abs(avg_std - target_std) < .01, 'bad std? %f %f'%(avg_std, target_std)
...@@ -450,30 +458,32 @@ def test_binomial(): ...@@ -450,30 +458,32 @@ def test_binomial():
def test_normal0(): def test_normal0():
steps = 50 steps = 50
std = 2.
if mode in ['DEBUG_MODE','DebugMode','FAST_COMPILE']: if mode in ['DEBUG_MODE','DebugMode','FAST_COMPILE']:
sample_size = (25,30) sample_size = (25,30)
rtol=.02 default_rtol=.02
else: else:
sample_size = (999,50) sample_size = (999,50)
rtol=.01 default_rtol=.01
sample_size_odd = (sample_size[0],sample_size[1]-1) sample_size_odd = (sample_size[0],sample_size[1]-1)
x = tensor.matrix() x = tensor.matrix()
for size, const_size, var_input, input in [ for size, const_size, var_input, input, avg, rtol in [
(sample_size, sample_size, [], []), (sample_size, sample_size, [], [], -5., default_rtol),
(x.shape, sample_size, [x], [numpy.zeros(sample_size, dtype=config.floatX)]), (x.shape, sample_size, [x], [numpy.zeros(sample_size, dtype=config.floatX)], -5., default_rtol),
(sample_size_odd, sample_size_odd, [], []),#test odd value (sample_size_odd, sample_size_odd, [], [], -5., default_rtol),#test odd value
(x.shape, sample_size_odd, [x], [numpy.zeros(sample_size_odd, dtype=config.floatX)]),#test odd value (x.shape, sample_size_odd, [x], [numpy.zeros(sample_size_odd, dtype=config.floatX)], -5., default_rtol),#test odd value
(sample_size, sample_size, [], [], numpy.arange(numpy.prod(sample_size), dtype='float32').reshape(sample_size), 10.*std/numpy.sqrt(steps)),
]: ]:
print '' print ''
print 'ON CPU:' print 'ON CPU:'
R = MRG_RandomStreams(234, use_cuda=False) R = MRG_RandomStreams(234, use_cuda=False)
n = R.normal(size=size, avg=-5.0, std=2.0) n = R.normal(size=size, avg=avg, std=std)
f = theano.function(var_input, n, mode=mode) f = theano.function(var_input, n, mode=mode)
theano.printing.debugprint(f) theano.printing.debugprint(f)
out = f(*input) out = f(*input)
print 'random?[:10]\n', out[0,0:10] print 'random?[:10]\n', out[0,0:10]
basictest(f, steps, const_size, target_avg=-5.0, target_std=2.0, prefix='mrg ', allow_01=True, inputs=input, mean_rtol=rtol) basictest(f, steps, const_size, target_avg=avg, target_std=std, prefix='mrg ', allow_01=True, inputs=input, mean_rtol=rtol)
sys.stdout.flush() sys.stdout.flush()
...@@ -481,7 +491,7 @@ def test_normal0(): ...@@ -481,7 +491,7 @@ def test_normal0():
print '' print ''
print 'ON GPU:' print 'ON GPU:'
R = MRG_RandomStreams(234, use_cuda=True) R = MRG_RandomStreams(234, use_cuda=True)
n = R.normal(size=size, avg=-5.0, std=2.0, dtype='float32') n = R.normal(size=size, avg=avg, std=std, dtype='float32')
assert n.dtype == 'float32' #well, it's really that this test w GPU doesn't make sense otw assert n.dtype == 'float32' #well, it's really that this test w GPU doesn't make sense otw
f = theano.function(var_input, theano.Out( f = theano.function(var_input, theano.Out(
theano.sandbox.cuda.basic_ops.gpu_from_host(n), theano.sandbox.cuda.basic_ops.gpu_from_host(n),
...@@ -493,7 +503,7 @@ def test_normal0(): ...@@ -493,7 +503,7 @@ def test_normal0():
print 'random?[:10]\n', gpu_out[0,0:10] print 'random?[:10]\n', gpu_out[0,0:10]
print '----' print '----'
sys.stdout.flush() sys.stdout.flush()
basictest(f, steps, const_size, target_avg=-5.0, target_std=2.0, prefix='gpu mrg ', allow_01=True, inputs=input, mean_rtol=rtol) basictest(f, steps, const_size, target_avg=avg, target_std=std, prefix='gpu mrg ', allow_01=True, inputs=input, mean_rtol=rtol)
# Need to allow some rounding error as their is float # Need to allow some rounding error as their is float
# computation that are done on the gpu vs cpu # computation that are done on the gpu vs cpu
assert numpy.allclose(out, gpu_out, rtol=5e-6, atol=5e-6) assert numpy.allclose(out, gpu_out, rtol=5e-6, atol=5e-6)
...@@ -503,10 +513,10 @@ def test_normal0(): ...@@ -503,10 +513,10 @@ def test_normal0():
print 'ON CPU w NUMPY:' print 'ON CPU w NUMPY:'
RR = theano.tensor.shared_randomstreams.RandomStreams(234) RR = theano.tensor.shared_randomstreams.RandomStreams(234)
nn = RR.normal(size=size, avg=-5.0, std=2.0) nn = RR.normal(size=size, avg=avg, std=std)
ff = theano.function(var_input, nn) ff = theano.function(var_input, nn)
basictest(ff, steps, const_size, target_avg=-5.0, target_std=2.0, prefix='numpy ', allow_01=True, inputs=input, mean_rtol=rtol) basictest(ff, steps, const_size, target_avg=avg, target_std=std, prefix='numpy ', allow_01=True, inputs=input, mean_rtol=rtol)
def basic_multinomialtest(f, steps, sample_size, target_pvals, prefix="", mean_rtol=0.04): def basic_multinomialtest(f, steps, sample_size, target_pvals, prefix="", mean_rtol=0.04):
......
...@@ -4676,7 +4676,8 @@ outer = Outer() ...@@ -4676,7 +4676,8 @@ outer = Outer()
# Gradient # Gradient
######################### #########################
def grad(cost, wrt, g_cost=None, consider_constant=[], warn_type=False): def grad(cost, wrt, g_cost=None, consider_constant=[], warn_type=False,
assume_continuously_differentiable = False):
""" """
:type cost: Scalar (0-dimensional) `Variable` :type cost: Scalar (0-dimensional) `Variable`
:type wrt: `Variable` or list of `Variable`s. :type wrt: `Variable` or list of `Variable`s.
...@@ -4688,6 +4689,14 @@ def grad(cost, wrt, g_cost=None, consider_constant=[], warn_type=False): ...@@ -4688,6 +4689,14 @@ def grad(cost, wrt, g_cost=None, consider_constant=[], warn_type=False):
:param warn_type: a value of True will cause warnings to be logged for any Op that emits a :param warn_type: a value of True will cause warnings to be logged for any Op that emits a
gradient that does not match its input type. gradient that does not match its input type.
:param assume_continuously_differentiable : flag that says if grad is strict about what it returns.
If set to false it will raise an exception for any argument in
``wrt`` for which there is no gradient either because some op does
not know how to compute the gradient with respect to that argument
or the argument is not part of the computational graph. If the flag
is set to true, the ``grad`` method returns zeros like the argument
( i.e. it makes the assumption that the gradient should be 0).
:rtype: `Variable` or list of `Variable`s (depending upon `wrt`) :rtype: `Variable` or list of `Variable`s (depending upon `wrt`)
:return: symbolic expression of gradient of `cost` with respect to `wrt`. :return: symbolic expression of gradient of `cost` with respect to `wrt`.
...@@ -4729,12 +4738,13 @@ def grad(cost, wrt, g_cost=None, consider_constant=[], warn_type=False): ...@@ -4729,12 +4738,13 @@ def grad(cost, wrt, g_cost=None, consider_constant=[], warn_type=False):
wrt = [wrt] wrt = [wrt]
ret = [] ret = []
for p in wrt: for p in wrt:
if p not in gmap: if p not in gmap and not assume_continuously_differentiable:
raise ValueError(("grad method was asked to compute the graident " raise ValueError(("grad method was asked to compute the graident "
"with respect to a variable that is not part of " "with respect to a variable that is not part of "
"the computational graph of the cost"),p) "the computational graph of the cost or is used "
"by a non-differentiable operator "),p)
else: else:
ret.append(gmap[p]) ret.append(gmap.get(p, zeros_like(p)))
if len(ret) == 1: if len(ret) == 1:
return ret[0] return ret[0]
...@@ -5008,7 +5018,8 @@ def verify_grad(fun, pt, n_tests=2, rng=None, eps=None, abs_tol=None, rel_tol=No ...@@ -5008,7 +5018,8 @@ def verify_grad(fun, pt, n_tests=2, rng=None, eps=None, abs_tol=None, rel_tol=No
if cast_to_output_type: if cast_to_output_type:
g_cost = cast(g_cost, o_output.dtype) g_cost = cast(g_cost, o_output.dtype)
symbolic_grad = grad(cost, tensor_pt, g_cost) symbolic_grad = grad(cost, tensor_pt, g_cost,
assume_continuously_differentiable = True)
#if o_output.dtype in ['float32','float64']: #if o_output.dtype in ['float32','float64']:
# assert all([x.dtype == o_output.dtype for x in symbolic_grad]),("Expected grad of type %s, got %s "%( symbolic_grad.dtype, o_output.dtyp)) # assert all([x.dtype == o_output.dtype for x in symbolic_grad]),("Expected grad of type %s, got %s "%( symbolic_grad.dtype, o_output.dtyp))
......
...@@ -3234,7 +3234,8 @@ class test_grad(unittest.TestCase): ...@@ -3234,7 +3234,8 @@ class test_grad(unittest.TestCase):
"""grad: Test returning a single zero value from grad""" """grad: Test returning a single zero value from grad"""
o = test_grad.O() o = test_grad.O()
a1 = o.make_node() a1 = o.make_node()
g = grad(a1.outputs[0], a1.outputs[1]) g = grad(a1.outputs[0], a1.outputs[1],
assume_continuously_differentiable = True)
self.assertTrue(g.owner.op == fill) self.assertTrue(g.owner.op == fill)
self.assertTrue(g.owner.inputs[1].data == 0) self.assertTrue(g.owner.inputs[1].data == 0)
try: try:
...@@ -3247,7 +3248,8 @@ class test_grad(unittest.TestCase): ...@@ -3247,7 +3248,8 @@ class test_grad(unittest.TestCase):
"""grad: Test returning some zero value from grad""" """grad: Test returning some zero value from grad"""
o = test_grad.O() o = test_grad.O()
a1 = o.make_node() a1 = o.make_node()
g0,g1,g2 = grad(a1.outputs[0], a1.inputs + [scalar('z')]) g0,g1,g2 = grad(a1.outputs[0], a1.inputs + [scalar('z')],
assume_continuously_differentiable = True)
self.assertTrue(o.gval0 is g0) self.assertTrue(o.gval0 is g0)
self.assertTrue(o.gval1 is g1) self.assertTrue(o.gval1 is g1)
self.assertTrue(g2.owner.op == fill) self.assertTrue(g2.owner.op == fill)
...@@ -3256,7 +3258,8 @@ class test_grad(unittest.TestCase): ...@@ -3256,7 +3258,8 @@ class test_grad(unittest.TestCase):
def test_zero_gradient_shape(self): def test_zero_gradient_shape(self):
"""Ensure that a zero gradient has the proper shape.""" """Ensure that a zero gradient has the proper shape."""
x = dmatrix() x = dmatrix()
f = theano.function([x], grad(dscalar(), x)) f = theano.function([x], grad(dscalar(), x,
assume_continuously_differentiable= True))
a = numpy.ones((3, 7)) a = numpy.ones((3, 7))
self.assertTrue((f(a) == 0).all()) # Zero gradient. self.assertTrue((f(a) == 0).all()) # Zero gradient.
self.assertTrue(a.shape == f(a).shape) # With proper shape. self.assertTrue(a.shape == f(a).shape) # With proper shape.
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论