提交 3a3a18f8 authored 作者: fsavard's avatar fsavard

Corrected bug in GpuJoin and reactivated the optimization.

上级 5c826bdc
...@@ -1480,7 +1480,8 @@ class GpuJoin(tensor.Join): ...@@ -1480,7 +1480,8 @@ class GpuJoin(tensor.Join):
# except for 'axis' # except for 'axis'
def construct_slices(curlen): def construct_slices(curlen):
slices = [slice(None,None,None) for i in range(len(cndas))] slices = [slice(None,None,None) for i in \
range(len(template_shape))]
slices[axis] = slice(curpos,curpos+curlen,None) slices[axis] = slice(curpos,curpos+curlen,None)
return tuple(slices) return tuple(slices)
......
...@@ -499,7 +499,7 @@ def local_gpu_join(node): ...@@ -499,7 +499,7 @@ def local_gpu_join(node):
not implemented yet. not implemented yet.
""" """
if False and isinstance(node.op, tensor.Join): if isinstance(node.op, tensor.Join):
# optimizing this case: # optimizing this case:
# join(host_from_gpu) -> host_from_gpu(gpu_join) # join(host_from_gpu) -> host_from_gpu(gpu_join)
......
...@@ -72,9 +72,7 @@ def test_softmax_with_bias(): ...@@ -72,9 +72,7 @@ def test_softmax_with_bias():
bv=numpy.random.rand(8) bv=numpy.random.rand(8)
assert numpy.allclose(f(xv,bv),f2(xv,bv)) assert numpy.allclose(f(xv,bv),f2(xv,bv))
def test_opt_gpujoin_onlyajoin():
def test_opt_gpujoin_joinvectors_elemwise_than_minusone():
# from a bug in normal sampling # from a bug in normal sampling
_a = numpy.asarray([[1,2],[3,4]],dtype='float32') _a = numpy.asarray([[1,2],[3,4]],dtype='float32')
_b = numpy.asarray([[5,6,7],[8,9,10]],dtype='float32') _b = numpy.asarray([[5,6,7],[8,9,10]],dtype='float32')
...@@ -87,6 +85,8 @@ def test_opt_gpujoin_joinvectors_elemwise_than_minusone(): ...@@ -87,6 +85,8 @@ def test_opt_gpujoin_joinvectors_elemwise_than_minusone():
#theano.printing.debugprint(f) #theano.printing.debugprint(f)
f()
graph_nodes = f.maker.env.toposort() graph_nodes = f.maker.env.toposort()
assert isinstance(graph_nodes[-1].op, cuda.HostFromGpu) assert isinstance(graph_nodes[-1].op, cuda.HostFromGpu)
...@@ -96,6 +96,38 @@ def test_opt_gpujoin_joinvectors_elemwise_than_minusone(): ...@@ -96,6 +96,38 @@ def test_opt_gpujoin_joinvectors_elemwise_than_minusone():
def test_opt_gpujoin_joinvectors_elemwise_then_minusone():
# from a bug in gpu normal sampling
_a = numpy.asarray([1,2,3,4],dtype='float32')
_b = numpy.asarray([5,6,7,8],dtype='float32')
a = theano.shared(_a)
b = theano.shared(_b)
a_prime = tensor.cos(a)
b_prime = tensor.sin(b)
c = tensor.join(0,a_prime,b_prime)
d = c[:-1]
f = theano.function([], d)
#theano.printing.debugprint(f)
graph_nodes = f.maker.env.toposort()
assert isinstance(graph_nodes[-1].op, cuda.HostFromGpu)
assert isinstance(graph_nodes[-2].op, cuda.GpuSubtensor)
assert isinstance(graph_nodes[-3].op, cuda.GpuJoin)
concat = numpy.concatenate([numpy.cos(_a),numpy.sin(_b)],axis=1)
concat = concat[:-1]
assert numpy.allclose(numpy.asarray(f()), concat)
if __name__ == '__main__': if __name__ == '__main__':
test_opt_gpujoin_onlyajoin() test_opt_gpujoin_onlyajoin()
test_opt_gpujoin_joinvectors_elemwise_than_minusone() test_opt_gpujoin_joinvectors_elemwise_then_minusone()
...@@ -808,7 +808,6 @@ def test_normal0(): ...@@ -808,7 +808,6 @@ def test_normal0():
print prefix, 'samples/sec', steps*sample_size[0]*sample_size[1] / dt print prefix, 'samples/sec', steps*sample_size[0]*sample_size[1] / dt
sample_size = (999,100) sample_size = (999,100)
'''
print '' print ''
print 'ON CPU:' print 'ON CPU:'
...@@ -820,11 +819,9 @@ def test_normal0(): ...@@ -820,11 +819,9 @@ def test_normal0():
basictest(f, 50, -5.0, 2.0, prefix='mrg ') basictest(f, 50, -5.0, 2.0, prefix='mrg ')
sys.stdout.flush() sys.stdout.flush()
'''
# now with odd number of samples # now with odd number of samples
sample_size = (10,10) sample_size = (999,99)
#sample_size = (999,99)
print '' print ''
...@@ -844,7 +841,6 @@ def test_normal0(): ...@@ -844,7 +841,6 @@ def test_normal0():
basictest(f, 50, -5.0, 2.0, prefix='gpu mrg ') basictest(f, 50, -5.0, 2.0, prefix='gpu mrg ')
'''
print '' print ''
print 'ON CPU w NUMPY:' print 'ON CPU w NUMPY:'
RR = theano.tensor.shared_randomstreams.RandomStreams(234) RR = theano.tensor.shared_randomstreams.RandomStreams(234)
...@@ -853,7 +849,6 @@ def test_normal0(): ...@@ -853,7 +849,6 @@ def test_normal0():
ff = theano.function([], nn) ff = theano.function([], nn)
basictest(ff, 50, -5.0, 2.0, prefix='numpy ') basictest(ff, 50, -5.0, 2.0, prefix='numpy ')
'''
if __name__ == '__main__': if __name__ == '__main__':
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论