提交 ff0abb5f authored 作者: abergeron's avatar abergeron

Merge pull request #1726 from carriepl/master

Conversion of GpuSoftmax and GpuSoftmaxWithBias to the new backend
差异被折叠。
......@@ -20,7 +20,9 @@ from theano.sandbox.gpuarray.basic_ops import (host_from_gpu,
from theano.sandbox.gpuarray.blas import gpu_dot22, GpuGemv, GpuGemm
from theano.sandbox.gpuarray.conv import GpuConv
from theano.sandbox.gpuarray.nnet import (GpuCrossentropySoftmaxArgmax1HotWithBias,
GpuCrossentropySoftmax1HotWithBiasDx)
GpuCrossentropySoftmax1HotWithBiasDx,
GpuSoftmaxWithBias,
GpuSoftmax)
from theano.sandbox.gpuarray.elemwise import (GpuElemwise, _is_scalar,
GpuDimShuffle, GpuCAReduceCuda)
from theano.sandbox.gpuarray.subtensor import GpuIncSubtensor, GpuSubtensor
......@@ -340,7 +342,16 @@ def local_gpua_crossentropysoftmaxargmax1hotwithbias(node):
@op_lifter([tensor.nnet.CrossentropySoftmax1HotWithBiasDx])
def local_gpua_crossentropysoftmax1hotwithbiasdx(node):
return GpuCrossentropySoftmax1HotWithBiasDx()
@register_opt()
@op_lifter([tensor.nnet.Softmax])
def local_gpua_softmax(node):
return GpuSoftmax()
@register_opt()
@op_lifter([tensor.nnet.SoftmaxWithBias])
def local_gpua_softmaxwithbias(node):
return GpuSoftmaxWithBias()
@register_opt()
@op_lifter([gpu_from_host, ConvOp])
......
......@@ -157,3 +157,132 @@ def test_GpuCrossentropySoftmax1HotWithBiasDx():
assert False, "numpy.allclose(cpu_out, gpu_out, rtol=%s, atol=%s)" % (
rtol, atol)
def test_softmax_with_bias_float32():
softmax_with_bias_unittest_template(dtypeInput='float32',
dtypeBias='float32')
def test_softmax_with_bias_float64():
softmax_with_bias_unittest_template(dtypeInput='float32',
dtypeBias='float64')
softmax_with_bias_unittest_template(dtypeInput='float64',
dtypeBias='float32')
softmax_with_bias_unittest_template(dtypeInput='float64',
dtypeBias='float64')
def softmax_with_bias_unittest_template(dtypeInput, dtypeBias):
"""
This is basic test for GpuSoftmaxWithBias with float64 variables
We check that we loop when their is too much block
TODO: check that we loop when their is too much thread.(THIS IS
NOT IMPLEMENTED)
"""
assert dtypeInput in ['float32', 'float64']
assert dtypeBias in ['float32', 'float64']
if dtypeInput == 'float32':
x = T.fmatrix('x')
elif dtypeInput == 'float64':
x = T.dmatrix('x')
# We can't use zeros_like(x[0,::]) as this don't allow to test with
# 0 shape
if dtypeBias == 'float32':
z = T.nnet.softmax_with_bias(x, T.arange(x.shape[1] * 2,
dtype='float32')[::2])
elif dtypeBias == 'float64':
z = T.nnet.softmax_with_bias(x, T.arange(x.shape[1] * 2,
dtype='float64')[::2])
f = theano.function([x], z, mode=mode_without_gpu)
f_gpu = theano.function([x], z, mode=mode_with_gpu)
assert f.maker.fgraph.toposort()[-1].op == T.nnet.softmax_with_bias
assert isinstance(f_gpu.maker.fgraph.toposort()[-2].op,
theano.sandbox.gpuarray.nnet.GpuSoftmaxWithBias)
def cmp(n, m):
#print "test_softmax",n,m
if dtypeInput == 'float32':
data = numpy.arange(n * m, dtype='float32').reshape(n, m)
elif dtypeInput == 'float64':
data = numpy.arange(n * m, dtype='float64').reshape(n, m)
out = f(data)
gout = f_gpu(data)
assert numpy.allclose(out, gout), numpy.absolute(out - gout)
cmp(2, 5)
#we need to test n>32*1024 to check that we make the block loop.
cmp(2 << 15, 5)
cmp(4074, 400)
cmp(0, 10)
cmp(784, 784)
cmp(4, 1000)
cmp(4, 1024)
cmp(4, 2000)
cmp(4, 2024)
#GTX285 don't have enough shared mem for this case.
cmp(4, 4074)
# The GTX580, 680 and kepler don't have enough shared memory.
cmp(2, 10000)
cmp(128, 16 * 1024)
cmp(128, 64 * 1024)
def test_softmax_float32():
softmax_unittest_template('float32')
def test_softmax_float64():
softmax_unittest_template('float64')
def softmax_unittest_template(dtypeInput):
"""
This is basic test for GpuSoftmax with float64 variables
We check that we loop when their is too much block
We use slower code when there isn't enough shared memory
"""
assert dtypeInput in ['float32', 'float64']
if dtypeInput == 'float32':
x = T.fmatrix('x')
elif dtypeInput == 'float64':
x = T.dmatrix('x')
z = T.nnet.softmax(x)
f = theano.function([x], z, mode=mode_without_gpu)
f_gpu = theano.function([x], z, mode=mode_with_gpu)
assert f.maker.fgraph.toposort()[-1].op == T.nnet.softmax
assert isinstance(f_gpu.maker.fgraph.toposort()[-2].op,
theano.sandbox.gpuarray.nnet.GpuSoftmax)
def cmp(n, m):
if dtypeInput == 'float32':
data = numpy.arange(n * m, dtype='float32').reshape(n, m)
elif dtypeInput == 'float64':
data = numpy.arange(n * m, dtype='float64').reshape(n, m)
out = f(data)
gout = f_gpu(data)
assert numpy.allclose(out, gout), numpy.absolute(out - gout)
#we need to test n>32*1024 to check that we make the block loop.
cmp(2, 5)
cmp(2 << 15, 5)
cmp(4074, 400)
cmp(0, 10)
cmp(784, 784)
cmp(4, 1000)
cmp(4, 1024)
cmp(4, 2000)
cmp(4, 2024)
# The GTX285 don't have enough shared memory.
cmp(4, 4074)
# The GTX580, 680 and kepler don't have enough shared memory.
cmp(2, 10000)
cmp(128, 16 * 1024)
cmp(128, 64 * 1024)
\ No newline at end of file
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论