提交 e4c6880e authored 作者: Razvan Pascanu's avatar Razvan Pascanu

merge; no conflicts

......@@ -50,7 +50,20 @@ class HostFromGpu(Op):
z[0] = numpy.asarray(x)
def grad(self, inputs, grads):
gz, = grads
return [gpu_from_host(gz)]
if isinstance(gz, tensor.TensorType):
# This would only happen if you call Lop, and provide a tensor
# that is not cuda
# This might require another look to be sure
return [gpu_from_host(gz)]
else:
return [gz]
def R_op(self, inputs, eval_points):
ev, = eval_points
if isinstance(ev, tensor.TensorType):
return [gpu_from_host(ev)]
else:
return [ev]
def infer_shape(self, node, xshp):
return xshp
host_from_gpu = HostFromGpu()
......@@ -72,7 +85,21 @@ class GpuFromHost(Op):
z[0] = type_support_filter(theano._asarray(x, dtype='float32'), tuple([0]*x.ndim), 0, z[0])
def grad(self, inputs, grads):
gz, = grads
return [host_from_gpu(gz)]
if isinstance(gz,CudaNdarrayType):
# This would only happen if you call Lop, and provide a tensor
# that is not cuda
# This might require another look to be sure
return [host_from_gpu(gz)]
else:
return [gz]
def R_op(self, inputs, eval_points):
ev, = eval_points
if isinstance(ev, CudaNdarrayType):
return [host_from_gpu(ev)]
else:
return [ev]
def infer_shape(self, node, xshp):
return xshp
gpu_from_host = GpuFromHost()
......
......@@ -24,5 +24,6 @@ from sharedvar import tensor_constructor as shared
import nnet # used for softmax, sigmoid, etc.
from tensor_grad import Rop, Lop, grad, numeric_grad, verify_grad
差异被折叠。
......@@ -218,6 +218,13 @@ class DimShuffle(Op):
rval.insert(augm, 1)
return [rval]
def R_op(self, inputs, eval_points):
if None in eval_points:
return [None]
return self.make_node(*eval_points).outputs
def c_code(self, node, name, inp, out, sub):
input, = inp
res, = out
......@@ -534,7 +541,78 @@ class Elemwise(Op):
else:
return self.name
def R_op(self, inputs, eval_points):
outs = self.make_node(*inputs).outputs
rval = [None for x in outs]
# For each output
for idx, out in enumerate(outs):
# make such that _bgrads computes only the gradients of the
# current output on the inputs ( and not all outputs)
ograds = [ theano.tensor.zeros_like(x) for x in outs]
ograds[idx] = theano.tensor.ones_like(out)
bgrads = self._bgrad(inputs, ograds)
rop_out = None
for jdx, (inp, eval_point) in enumerate(zip(inputs,
eval_points)):
# if None, then we can just ignore this branch ..
# what we do is to assume that for any non-differentiable
# branch, the gradient is actually 0, which I think is not
# the right thing to do .. have to talk to Ian and James
# about it
if bgrads[jdx] is None:
pass
elif eval_point is not None:
if rop_out is None:
rop_out = bgrads[jdx]*eval_point
else:
rop_out = rop_out + bgrads[jdx]*eval_point
rval[idx] = rop_out
return rval
def grad(self, inputs, ograds):
#compute grad with respect to broadcasted input
rval = self._bgrad(inputs,ograds)
#sum out the broadcasted dimensions
for i, ipt in enumerate(inputs):
if rval[i] is None:
continue
# list of all the dimensions that are broadcastable for input[i] so we
# can sum over them
# todo: only count dimensions that were effectively broadcasted
to_sum = [j for j, bcast in enumerate(ipt.type.broadcastable) if bcast]
if to_sum:
shuffle = []
j = 0
for bcast in ipt.type.broadcastable:
if bcast == 1:
shuffle.append('x')
else:
shuffle.append(j)
j += 1
#close if
#close for
sr = Sum(axis = to_sum)(rval[i])
sr = sr.dimshuffle(shuffle)
#sr = DimShuffle(sr.type.broadcastable, shuffle)(sr)
rval[i] = sr
#close if
#close for
return rval
def _bgrad(self, inputs, ograds):
# returns grad, with respect to broadcasted versions of inputs
# Gradients (especially on the final costs) don't have to be symbolic
# e.g., ograds will be [ 1. ] if your objective is c and the output
# of the current apply node is c
......@@ -558,35 +636,17 @@ class Elemwise(Op):
broadcastable = ()),
numpy.asarray(r.data)) # .reshape(b)
return DimShuffle((), ['x']*nd, inplace = True)(res)
new_r = Elemwise(node.op, {})(*[transform(input) for input in node.inputs])
new_r = Elemwise(node.op, {})(*[transform(ipt) for ipt in node.inputs])
return new_r
ret = []
for scalar_igrad, input in zip(scalar_igrads, inputs):
for scalar_igrad, ipt in zip(scalar_igrads, inputs):
if scalar_igrad is None:
# undefined gradient
ret.append(None)
continue
r = transform(scalar_igrad)
ret.append( transform(scalar_igrad))
# list of all the dimensions that are broadcastable for that input so we
# can sum over them
# todo: only count dimensions that were effectively broadcasted
to_sum = [i for i, bcast in enumerate(input.type.broadcastable) if bcast]
if to_sum:
shuffle = []
j = 0
for bcast in input.type.broadcastable:
if bcast == 1:
shuffle.append('x')
else:
shuffle.append(j)
j += 1
sr = Sum(axis = to_sum)(r)
sr = DimShuffle(sr.type.broadcastable, shuffle)(sr)
ret.append(sr)
else:
ret.append(r)
return ret
def perform(self, node, inputs, output_storage):
......@@ -1180,6 +1240,11 @@ class Sum(CAReduce):
i += 1
return Elemwise(scalar.second)(x, DimShuffle(gz.type.broadcastable, new_dims)(gz)),
def R_op(self, inputs, eval_points):
if None in eval_points:
return [None]
return self.make_node(*eval_points).outputs
def __str__(self):
if self.axis is None:
return "Sum"
......
......@@ -347,6 +347,13 @@ class Softmax(gof.Op):
sm = softmax(x)
return [softmax_grad(g_sm, sm)]
def R_op(self, inputs, eval_points):
# I think the Jacobian is symmetric so the R_op
# is the same as the grad
if None in eval_points:
return [None]
return self.grad(inputs, eval_points)
def infer_shape(self, node, shape):
return shape
......
......@@ -469,6 +469,11 @@ class MakeVector(T.Op):
grads.append(output_gradients[0][i])
return grads
def R_op(self, inputs, eval_points):
if None in eval_points:
return [None]
return self.make_node(*eval_points).outputs
make_vector = MakeVector()
class MakeVectorPrinter:
......
差异被折叠。
......@@ -2,6 +2,48 @@
WRITE ME
Tests for the R operator / L operator
ops without:
PermuteRowElements
Tile
AdvancedSubtensor
TensorDot
Outer
Prod
MulwithoutZeros
ProdWithoutZeros
list of ops that support R-op:
* Alloc
* Split
* ARange
* ScalarFromTensor
* Shape
* SpecifyShape
* MaxAndArgmax
* Subtensor
* IncSubtensor
* Rebroadcast
* Join
* Reshape
* Flatten
* AdvancedSubtensor1
* AdvancedIncSubtensor1
* AdvancedIncSubtensor
* Dot
* DimShuffle
* Elemwise
* Sum
* Softmax
* Scan
"""
import unittest
......@@ -10,52 +52,208 @@ from theano import function
import theano
import theano.tensor as TT
import numpy
from theano.gof import Op, Apply
class test_rop(unittest.TestCase):
'''
Special Op created to test what happens when you have one op that is not
differentiable in the computational graph
'''
class BreakRop(Op):
"""
@note: Non-differentiable.
"""
def __hash__(self):
return hash(type(self))
def __eq__(self, other):
return type(self) == type(other)
def make_node(self, x):
return Apply(self, [x], [x.type()])
def perform(self, node, inp, out_):
x, = inp
out, = out_
out[0] = x
def grad(self, inp, grads):
return [None]
def R_op(self, inputs, eval_points):
return [None]
def test_specifyshape(self):
rng = numpy.random.RandomState(utt.fetch_seed())
vx = numpy.asarray(rng.uniform(size=(5,)), theano.config.floatX)
vv = numpy.asarray(rng.uniform(size=(5,)), theano.config.floatX)
x = TT.vector('x')
v = TT.vector('v')
y = TT.specify_shape(x, (5,))
yv = TT.Rop(y,x,v)
rop_f = function([x,v], yv)
J, _ = theano.scan( lambda i,y,x: TT.grad(y[i],x),
sequences = TT.arange(x.shape[0]),
non_sequences = [y,x])
sy = TT.dot(J, v)
break_op = BreakRop()
class test_RopLop(unittest.TestCase):
def setUp(self):
# Using vectors make things a lot simpler for generating the same
# computations using scan
self.x = TT.vector('x')
self.v = TT.vector('v')
self.rng = numpy.random.RandomState(utt.fetch_seed())
self.in_shape = ( 5+self.rng.randint(30),)
self.mx = TT.matrix('mx')
self.mv = TT.matrix('mv')
self.mat_in_shape = ( 5 + self.rng.randint(30),
5+self.rng.randint(30))
def check_nondiff_rop(self, y):
raised = False
try:
tmp = TT.Rop(y, self.x, self.v)
except ValueError:
raised = True
if not raised:
self.fail((
'Op did not raised an error even though the function'
' is not differentiable'))
def check_mat_rop_lop(self, y, out_shape):
vx = numpy.asarray(self.rng.uniform(size=self.mat_in_shape), theano.config.floatX)
vv = numpy.asarray(self.rng.uniform(size=self.mat_in_shape), theano.config.floatX)
yv = TT.Rop(y, self.mx, self.mv)
rop_f = function([self.mx, self.mv], yv)
sy, _ = theano.scan( lambda i,y,x,v: (TT.grad(y[i],x)*v).sum(),
sequences = TT.arange(y.shape[0]),
non_sequences = [y,self.mx,self.mv])
scan_f = function([self.mx,self.mv], sy)
scan_f = function([x,v], sy)
v1 = rop_f(vx,vv)
v2 = scan_f(vx,vv)
assert numpy.allclose(v1,v2)
self.check_nondiff_rop( theano.clone(y,
replace={self.mx:break_op(self.mx)}))
vv = numpy.asarray(self.rng.uniform(size=out_shape), theano.config.floatX)
yv = TT.Lop(y, self.mx, self.v)
lop_f = function([self.mx, self.v], yv)
sy, _ = theano.scan( lambda i,y,x,v: (TT.grad(y[i]*v[i],x))[i],
sequences = TT.arange(y.shape[0]),
non_sequences = [y,self.mx,self.v])
scan_f = function([self.mx, self.v], sy)
class test_lop(unittest.TestCase):
def test_specifyshape(self):
rng = numpy.random.RandomState(utt.fetch_seed())
vx = numpy.asarray(rng.uniform(size=(5,)), theano.config.floatX)
vv = numpy.asarray(rng.uniform(size=(5,)), theano.config.floatX)
x = TT.vector('x')
v = TT.vector('v')
y = TT.specify_shape(x, (5,))
yv = TT.Lop(y,x,v)
rop_f = function([x,v], yv)
v1 = lop_f(vx,vv)
v2 = scan_f(vx,vv)
assert numpy.allclose(v1,v2)
def check_rop_lop(self, y, out_shape):
# TEST ROP
vx = numpy.asarray(self.rng.uniform(size=self.in_shape), theano.config.floatX)
vv = numpy.asarray(self.rng.uniform(size=self.in_shape), theano.config.floatX)
yv = TT.Rop(y,self.x,self.v)
rop_f = function([self.x,self.v], yv)
J, _ = theano.scan( lambda i,y,x: TT.grad(y[i],x),
sequences = TT.arange(x.shape[0]),
non_sequences = [y,x])
sy = TT.dot(v, J)
sequences = TT.arange(y.shape[0]),
non_sequences = [y,self.x])
sy = TT.dot(J, self.v)
scan_f = function([x,v], sy)
scan_f = function([self.x,self.v], sy)
v1 = rop_f(vx,vv)
v2 = scan_f(vx,vv)
assert numpy.allclose(v1,v2)
self.check_nondiff_rop( theano.clone(y,
replace={self.x:break_op(self.x)}))
# TEST LOP
vx = numpy.asarray(self.rng.uniform(size=self.in_shape), theano.config.floatX)
vv = numpy.asarray(self.rng.uniform(size=out_shape), theano.config.floatX)
yv = TT.Lop(y,self.x,self.v)
lop_f = function([self.x,self.v], yv)
J, _ = theano.scan( lambda i,y,x: TT.grad(y[i],x),
sequences = TT.arange(y.shape[0]),
non_sequences = [y,self.x])
sy = TT.dot(self.v, J)
scan_f = function([self.x,self.v], sy)
v1 = lop_f(vx,vv)
v2 = scan_f(vx,vv)
assert numpy.allclose(v1,v2)
def test_shape(self):
self.check_nondiff_rop( self.x.shape[0])
def test_specifyshape(self):
self.check_rop_lop(TT.specify_shape(self.x, self.in_shape),
self.in_shape)
def test_max_argmax(self):
self.check_map_rop_lop(TT.max(self.mx, axis=1),
(self.mat_in_shape[0],))
def test_max_argmax(self):
self.check_nondiff_rop(TT.argmax(self.mx,axis=1))
def test_subtensor(self):
self.check_rop_lop(self.x[:4], (4,))
def test_incsubtensor1(self):
tv = numpy.asarray( self.rng.uniform(size=(3,)),
theano.config.floatX)
t = theano.shared(tv)
out = TT.inc_subtensor(self.x[:3], t)
self.check_rop_lop(out, self.in_shape)
def test_incsubtensor1(self):
tv = numpy.asarray( self.rng.uniform(size=(10,)),
theano.config.floatX)
t = theano.shared(tv)
out = TT.inc_subtensor(t[:4], self.x[:4])
self.check_rop_lop(out, (10,))
def test_setsubtensor1(self):
tv = numpy.asarray( self.rng.uniform(size=(3,)),
theano.config.floatX)
t = theano.shared(tv)
out = TT.set_subtensor(self.x[:3], t)
self.check_rop_lop(out, self.in_shape)
def test_setsubtensor1(self):
tv = numpy.asarray( self.rng.uniform(size=(10,)),
theano.config.floatX)
t = theano.shared(tv)
out = TT.set_subtensor(t[:4], self.x[:4])
self.check_rop_lop(out, (10,))
def test_join(self):
tv = numpy.asarray( self.rng.uniform(size=(10,)),
theano.config.floatX)
t = theano.shared(tv)
out = TT.join(0, self.x, t)
self.check_rop_lop(out, (self.in_shape[0]+10,))
def test_dot(self):
insh = self.in_shape[0]
vW = numpy.asarray(self.rng.uniform(size=(insh,insh)),
theano.config.floatX)
W = theano.shared(vW)
self.check_rop_lop( TT.dot(self.x, W), self.in_shape)
def test_elemwise0(self):
self.check_rop_lop( (self.x+1)**2, self.in_shape)
def test_elemwise1(self):
self.check_rop_lop( self.x+TT.cast(self.x, 'int32'),
self.in_shape)
def test_sum(self):
self.check_mat_rop_lop(self.mx.sum(axis=1), (self.mat_in_shape[0],))
def test_softmax(self):
# Softmax adds an extra dimnesion !
self.check_rop_lop( TT.nnet.softmax(self.x)[0], self.in_shape)
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论