提交 3a3a18f8 authored 作者: fsavard's avatar fsavard

Corrected bug in GpuJoin and reactivated the optimization.

上级 5c826bdc
......@@ -1480,7 +1480,8 @@ class GpuJoin(tensor.Join):
# except for 'axis'
def construct_slices(curlen):
slices = [slice(None,None,None) for i in range(len(cndas))]
slices = [slice(None,None,None) for i in \
range(len(template_shape))]
slices[axis] = slice(curpos,curpos+curlen,None)
return tuple(slices)
......
......@@ -499,7 +499,7 @@ def local_gpu_join(node):
not implemented yet.
"""
if False and isinstance(node.op, tensor.Join):
if isinstance(node.op, tensor.Join):
# optimizing this case:
# join(host_from_gpu) -> host_from_gpu(gpu_join)
......
......@@ -72,9 +72,7 @@ def test_softmax_with_bias():
bv=numpy.random.rand(8)
assert numpy.allclose(f(xv,bv),f2(xv,bv))
def test_opt_gpujoin_joinvectors_elemwise_than_minusone():
def test_opt_gpujoin_onlyajoin():
# from a bug in normal sampling
_a = numpy.asarray([[1,2],[3,4]],dtype='float32')
_b = numpy.asarray([[5,6,7],[8,9,10]],dtype='float32')
......@@ -82,11 +80,13 @@ def test_opt_gpujoin_joinvectors_elemwise_than_minusone():
b = theano.shared(_b)
c = tensor.join(1,a,b)
f = theano.function([], c)
#theano.printing.debugprint(f)
f()
graph_nodes = f.maker.env.toposort()
assert isinstance(graph_nodes[-1].op, cuda.HostFromGpu)
......@@ -96,6 +96,38 @@ def test_opt_gpujoin_joinvectors_elemwise_than_minusone():
def test_opt_gpujoin_joinvectors_elemwise_then_minusone():
# from a bug in gpu normal sampling
_a = numpy.asarray([1,2,3,4],dtype='float32')
_b = numpy.asarray([5,6,7,8],dtype='float32')
a = theano.shared(_a)
b = theano.shared(_b)
a_prime = tensor.cos(a)
b_prime = tensor.sin(b)
c = tensor.join(0,a_prime,b_prime)
d = c[:-1]
f = theano.function([], d)
#theano.printing.debugprint(f)
graph_nodes = f.maker.env.toposort()
assert isinstance(graph_nodes[-1].op, cuda.HostFromGpu)
assert isinstance(graph_nodes[-2].op, cuda.GpuSubtensor)
assert isinstance(graph_nodes[-3].op, cuda.GpuJoin)
concat = numpy.concatenate([numpy.cos(_a),numpy.sin(_b)],axis=1)
concat = concat[:-1]
assert numpy.allclose(numpy.asarray(f()), concat)
if __name__ == '__main__':
test_opt_gpujoin_onlyajoin()
test_opt_gpujoin_joinvectors_elemwise_than_minusone()
test_opt_gpujoin_joinvectors_elemwise_then_minusone()
......@@ -808,7 +808,6 @@ def test_normal0():
print prefix, 'samples/sec', steps*sample_size[0]*sample_size[1] / dt
sample_size = (999,100)
'''
print ''
print 'ON CPU:'
......@@ -820,11 +819,9 @@ def test_normal0():
basictest(f, 50, -5.0, 2.0, prefix='mrg ')
sys.stdout.flush()
'''
# now with odd number of samples
sample_size = (10,10)
#sample_size = (999,99)
sample_size = (999,99)
print ''
......@@ -844,7 +841,6 @@ def test_normal0():
basictest(f, 50, -5.0, 2.0, prefix='gpu mrg ')
'''
print ''
print 'ON CPU w NUMPY:'
RR = theano.tensor.shared_randomstreams.RandomStreams(234)
......@@ -853,7 +849,6 @@ def test_normal0():
ff = theano.function([], nn)
basictest(ff, 50, -5.0, 2.0, prefix='numpy ')
'''
if __name__ == '__main__':
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论