提交 ed4afc19 authored 作者: Benjamin Scellier's avatar Benjamin Scellier

file theano/gpuarray/tests/test_opt.py

上级 385f7230
from __future__ import absolute_import, print_function, division from __future__ import absolute_import, print_function, division
import numpy import numpy as np
from nose.tools import assert_raises from nose.tools import assert_raises
import theano import theano
...@@ -79,13 +79,13 @@ def test_local_gpu_contiguous(): ...@@ -79,13 +79,13 @@ def test_local_gpu_contiguous():
def test_flatten(): def test_flatten():
m = theano.tensor.fmatrix() m = theano.tensor.fmatrix()
f = theano.function([m], m.flatten(), mode=mode_with_gpu) f = theano.function([m], m.flatten(), mode=mode_with_gpu)
val = numpy.random.rand(10, 11).astype("float32") val = np.random.rand(10, 11).astype("float32")
res = f(val) res = f(val)
utt.assert_allclose(res, val.flatten()) utt.assert_allclose(res, val.flatten())
assert res.shape == val.flatten().shape assert res.shape == val.flatten().shape
assert GpuReshape in [type(node.op) assert GpuReshape in [type(node.op)
for node in f.maker.fgraph.toposort()] for node in f.maker.fgraph.toposort()]
val = numpy.random.rand(10, 11).astype("float32") val = np.random.rand(10, 11).astype("float32")
res = f(val) res = f(val)
utt.assert_allclose(res, val.flatten()) utt.assert_allclose(res, val.flatten())
assert res.shape == val.flatten().shape assert res.shape == val.flatten().shape
...@@ -93,7 +93,7 @@ def test_flatten(): ...@@ -93,7 +93,7 @@ def test_flatten():
for node in f.maker.fgraph.toposort()] for node in f.maker.fgraph.toposort()]
f = theano.function([m], m.flatten(ndim=2), mode=mode_with_gpu) f = theano.function([m], m.flatten(ndim=2), mode=mode_with_gpu)
val = numpy.random.rand(10, 11).astype("float32") val = np.random.rand(10, 11).astype("float32")
res = f(val) res = f(val)
utt.assert_allclose(res, val) utt.assert_allclose(res, val)
assert res.shape == val.shape assert res.shape == val.shape
...@@ -102,7 +102,7 @@ def test_flatten(): ...@@ -102,7 +102,7 @@ def test_flatten():
m = theano.tensor.tensor3() m = theano.tensor.tensor3()
f = theano.function([m], m.flatten(ndim=2), mode=mode_with_gpu) f = theano.function([m], m.flatten(ndim=2), mode=mode_with_gpu)
val = numpy.random.rand(10, 11, 12).astype("float32") val = np.random.rand(10, 11, 12).astype("float32")
res = f(val) res = f(val)
utt.assert_allclose(res, val.reshape(10, -1)) utt.assert_allclose(res, val.reshape(10, -1))
assert res.shape == val.reshape(10, -1).shape assert res.shape == val.reshape(10, -1).shape
...@@ -120,7 +120,7 @@ def test_reduce(): ...@@ -120,7 +120,7 @@ def test_reduce():
f = theano.function([m], getattr(m, method)(axis=0, f = theano.function([m], getattr(m, method)(axis=0,
**param), **param),
mode=mode_with_gpu) mode=mode_with_gpu)
val = numpy.random.rand(10, 11).astype("float32") val = np.random.rand(10, 11).astype("float32")
res = f(val) res = f(val)
utt.assert_allclose(res, getattr(val, method)(axis=0)) utt.assert_allclose(res, getattr(val, method)(axis=0))
assert res.shape == (11,) assert res.shape == (11,)
...@@ -135,9 +135,9 @@ def test_reduce(): ...@@ -135,9 +135,9 @@ def test_reduce():
def test_local_gpualloc_memset_0(): def test_local_gpualloc_memset_0():
i = theano.tensor.iscalar() i = theano.tensor.iscalar()
z = numpy.zeros((1,), dtype='float32') z = np.zeros((1,), dtype='float32')
o = numpy.ones((1,), dtype='float32') o = np.ones((1,), dtype='float32')
ones = numpy.ones((2,), dtype='float32') ones = np.ones((2,), dtype='float32')
# Test with 0 from CPU op. # Test with 0 from CPU op.
# Should not be transfered as the only client is the output # Should not be transfered as the only client is the output
...@@ -146,7 +146,7 @@ def test_local_gpualloc_memset_0(): ...@@ -146,7 +146,7 @@ def test_local_gpualloc_memset_0():
topo = f.maker.fgraph.toposort() topo = f.maker.fgraph.toposort()
assert len(topo) == 1 assert len(topo) == 1
assert isinstance(topo[0].op, theano.tensor.Alloc) assert isinstance(topo[0].op, theano.tensor.Alloc)
assert (numpy.asarray(f(6)) == 0).all() assert (np.asarray(f(6)) == 0).all()
# Test with 0 from CPU op. # Test with 0 from CPU op.
# Should be transfered as it is used by another op. # Should be transfered as it is used by another op.
...@@ -155,7 +155,7 @@ def test_local_gpualloc_memset_0(): ...@@ -155,7 +155,7 @@ def test_local_gpualloc_memset_0():
topo = f.maker.fgraph.toposort() topo = f.maker.fgraph.toposort()
assert len(topo) == 3 assert len(topo) == 3
assert isinstance(topo[0].op, GpuAlloc) assert isinstance(topo[0].op, GpuAlloc)
assert (numpy.asarray(f(6)) == 0).all() assert (np.asarray(f(6)) == 0).all()
# Test with 0 # Test with 0
a = GpuAlloc(test_ctx_name)(z, i) a = GpuAlloc(test_ctx_name)(z, i)
...@@ -163,7 +163,7 @@ def test_local_gpualloc_memset_0(): ...@@ -163,7 +163,7 @@ def test_local_gpualloc_memset_0():
topo = f.maker.fgraph.toposort() topo = f.maker.fgraph.toposort()
assert len(topo) == 1 assert len(topo) == 1
assert isinstance(topo[0].op, GpuAlloc) and topo[0].op.memset_0 assert isinstance(topo[0].op, GpuAlloc) and topo[0].op.memset_0
assert (numpy.asarray(f(6)) == 0).all() assert (np.asarray(f(6)) == 0).all()
# Test with 1 # Test with 1
a = GpuAlloc(test_ctx_name)(o, i) a = GpuAlloc(test_ctx_name)(o, i)
...@@ -172,7 +172,7 @@ def test_local_gpualloc_memset_0(): ...@@ -172,7 +172,7 @@ def test_local_gpualloc_memset_0():
assert len(topo) == 1 assert len(topo) == 1
assert isinstance(topo[0].op, GpuAlloc) assert isinstance(topo[0].op, GpuAlloc)
assert not topo[0].op.memset_0 assert not topo[0].op.memset_0
assert (numpy.asarray(f(6)) == 1).all() assert (np.asarray(f(6)) == 1).all()
# Test with 1, 1 # Test with 1, 1
a = GpuAlloc(test_ctx_name)(ones, i) a = GpuAlloc(test_ctx_name)(ones, i)
...@@ -181,7 +181,7 @@ def test_local_gpualloc_memset_0(): ...@@ -181,7 +181,7 @@ def test_local_gpualloc_memset_0():
assert len(topo) == 1 assert len(topo) == 1
assert isinstance(topo[0].op, GpuAlloc) assert isinstance(topo[0].op, GpuAlloc)
assert not topo[0].op.memset_0 assert not topo[0].op.memset_0
assert (numpy.asarray(f(2)) == 1).all() assert (np.asarray(f(2)) == 1).all()
def test_local_gpualloc_empty(): def test_local_gpualloc_empty():
...@@ -219,7 +219,7 @@ def test_local_gpualloc_empty(): ...@@ -219,7 +219,7 @@ def test_local_gpualloc_empty():
def test_rebroadcast(): def test_rebroadcast():
d = numpy.random.rand(10, 10).astype('float32') d = np.random.rand(10, 10).astype('float32')
v = theano.tensor.fmatrix() v = theano.tensor.fmatrix()
up = tensor.unbroadcast(v.sum().dimshuffle('x', 'x'), 0, 1) up = tensor.unbroadcast(v.sum().dimshuffle('x', 'x'), 0, 1)
f = theano.function([v], [up], mode=mode_with_gpu) f = theano.function([v], [up], mode=mode_with_gpu)
...@@ -257,14 +257,14 @@ class test_gpu_ifelse(test_ifelse.test_ifelse): ...@@ -257,14 +257,14 @@ class test_gpu_ifelse(test_ifelse.test_ifelse):
f = theano.function([x, cond], f = theano.function([x, cond],
theano.ifelse.ifelse(cond, x.mean(), x.sum()), theano.ifelse.ifelse(cond, x.mean(), x.sum()),
mode=mode_with_gpu) mode=mode_with_gpu)
assert f(numpy.float32([1, 2, 3]), 0) == 6 assert f(np.float32([1, 2, 3]), 0) == 6
x = tensor.vector() x = tensor.vector()
cond = tensor.scalar() cond = tensor.scalar()
f = theano.function([x, cond], f = theano.function([x, cond],
theano.ifelse.ifelse(cond, x.mean(), x.sum()), theano.ifelse.ifelse(cond, x.mean(), x.sum()),
mode=mode_with_gpu) mode=mode_with_gpu)
assert f(numpy.float32([1, 2, 3]), 0) == 6 assert f(np.float32([1, 2, 3]), 0) == 6
def test_print_op(): def test_print_op():
...@@ -277,7 +277,7 @@ def test_print_op(): ...@@ -277,7 +277,7 @@ def test_print_op():
assert isinstance(topo[1].op, theano.printing.Print) assert isinstance(topo[1].op, theano.printing.Print)
assert isinstance(topo[2].op, GpuElemwise) assert isinstance(topo[2].op, GpuElemwise)
assert topo[3].op == host_from_gpu assert topo[3].op == host_from_gpu
f(numpy.random.random((5, 5)).astype('float32')) f(np.random.random((5, 5)).astype('float32'))
def test_pdbbreakpoint_op(): def test_pdbbreakpoint_op():
...@@ -306,7 +306,7 @@ def test_local_gpu_elemwise_careduce(): ...@@ -306,7 +306,7 @@ def test_local_gpu_elemwise_careduce():
topo = f.maker.fgraph.toposort() topo = f.maker.fgraph.toposort()
assert len(topo) == 3 assert len(topo) == 3
assert topo[1].op.pre_scalar_op == theano.scalar.sqr assert topo[1].op.pre_scalar_op == theano.scalar.sqr
data = numpy.random.rand(3, 4).astype(theano.config.floatX) data = np.random.rand(3, 4).astype(theano.config.floatX)
utt.assert_allclose(f(data), (data * data).sum()) utt.assert_allclose(f(data), (data * data).sum())
o = (x * x).sum(axis=1) o = (x * x).sum(axis=1)
...@@ -328,15 +328,15 @@ def test_local_lift_dot22scalar(): ...@@ -328,15 +328,15 @@ def test_local_lift_dot22scalar():
for n in f_gpu.maker.fgraph.apply_nodes) for n in f_gpu.maker.fgraph.apply_nodes)
assert any(isinstance(n.op, GpuGemm) assert any(isinstance(n.op, GpuGemm)
for n in f_gpu.maker.fgraph.apply_nodes) for n in f_gpu.maker.fgraph.apply_nodes)
x_val = numpy.random.random((2, 3)).astype(theano.config.floatX) x_val = np.random.random((2, 3)).astype(theano.config.floatX)
y_val = numpy.random.random((3, 4)).astype(theano.config.floatX) y_val = np.random.random((3, 4)).astype(theano.config.floatX)
a_val = 0.5 a_val = 0.5
utt.assert_allclose(f_cpu(x_val, y_val, a_val), f_gpu(x_val, y_val, a_val)) utt.assert_allclose(f_cpu(x_val, y_val, a_val), f_gpu(x_val, y_val, a_val))
def test_local_gpu_subtensor(): def test_local_gpu_subtensor():
# Test shared forced on CPU. # Test shared forced on CPU.
t = tensor._shared(numpy.zeros(20, "float32")) t = tensor._shared(np.zeros(20, "float32"))
f = theano.function([], t[3:4], mode=mode_with_gpu) f = theano.function([], t[3:4], mode=mode_with_gpu)
topo = f.maker.fgraph.toposort() topo = f.maker.fgraph.toposort()
assert any([type(node.op) is tensor.Subtensor for node in topo]) assert any([type(node.op) is tensor.Subtensor for node in topo])
...@@ -367,7 +367,7 @@ def test_local_gpu_subtensor(): ...@@ -367,7 +367,7 @@ def test_local_gpu_subtensor():
# Test shared forced on CPU end we do computation on the output of # Test shared forced on CPU end we do computation on the output of
# the subtensor. # the subtensor.
t = tensor._shared(numpy.zeros(20, "float32")) t = tensor._shared(np.zeros(20, "float32"))
f = theano.function([], t[3:4] + 1, mode=mode_with_gpu) f = theano.function([], t[3:4] + 1, mode=mode_with_gpu)
topo = f.maker.fgraph.toposort() topo = f.maker.fgraph.toposort()
assert any([type(node.op) is tensor.Subtensor for node in topo]) assert any([type(node.op) is tensor.Subtensor for node in topo])
...@@ -386,9 +386,9 @@ def test_local_gpu_elemwise(): ...@@ -386,9 +386,9 @@ def test_local_gpu_elemwise():
b = tensor.fmatrix() b = tensor.fmatrix()
c = tensor.fmatrix() c = tensor.fmatrix()
a_v = (numpy.random.rand(4, 5) * 10).astype("int8") a_v = (np.random.rand(4, 5) * 10).astype("int8")
b_v = (numpy.random.rand(4, 5) * 10).astype("float32") b_v = (np.random.rand(4, 5) * 10).astype("float32")
c_v = (numpy.random.rand(4, 5) * 10).astype("float32") c_v = (np.random.rand(4, 5) * 10).astype("float32")
# Due to optimization order, this composite is created when all # Due to optimization order, this composite is created when all
# the op are on the gpu. # the op are on the gpu.
...@@ -440,7 +440,7 @@ def test_local_gpu_elemwise(): ...@@ -440,7 +440,7 @@ def test_local_gpu_elemwise():
utt.assert_allclose(out[1], a_v * c_v) utt.assert_allclose(out[1], a_v * c_v)
# Test non-contiguous input # Test non-contiguous input
c = gpuarray_shared_constructor(numpy.asarray(c_v, dtype='float32')) c = gpuarray_shared_constructor(np.asarray(c_v, dtype='float32'))
f = theano.function([a, b], outs_op(a[::2], b[::2], c[::2]), f = theano.function([a, b], outs_op(a[::2], b[::2], c[::2]),
mode=mode_with_gpu) mode=mode_with_gpu)
out = f(a_v, b_v) out = f(a_v, b_v)
...@@ -462,7 +462,7 @@ def test_local_lift_abstractconv_gpu_shape(): ...@@ -462,7 +462,7 @@ def test_local_lift_abstractconv_gpu_shape():
def test_local_assert_no_cpu_op(): def test_local_assert_no_cpu_op():
rng = numpy.random.RandomState(utt.fetch_seed()) rng = np.random.RandomState(utt.fetch_seed())
m = rng.uniform(-1, 1, (10, 10)).astype("float32") m = rng.uniform(-1, 1, (10, 10)).astype("float32")
ms = gpuarray_shared_constructor(m, name="m_shared") ms = gpuarray_shared_constructor(m, name="m_shared")
out = theano.tensor.tanh(ms).dot(ms.T) out = theano.tensor.tanh(ms).dot(ms.T)
...@@ -512,6 +512,6 @@ def test_local_lift_solve(): ...@@ -512,6 +512,6 @@ def test_local_lift_solve():
for n in f_gpu.maker.fgraph.apply_nodes) for n in f_gpu.maker.fgraph.apply_nodes)
assert any(isinstance(n.op, GpuCusolverSolve) assert any(isinstance(n.op, GpuCusolverSolve)
for n in f_gpu.maker.fgraph.apply_nodes) for n in f_gpu.maker.fgraph.apply_nodes)
A_val = numpy.random.uniform(-0.4, 0.4, (5, 5)).astype("float32") A_val = np.random.uniform(-0.4, 0.4, (5, 5)).astype("float32")
b_val = numpy.random.uniform(-0.4, 0.4, (5, 3)).astype("float32") b_val = np.random.uniform(-0.4, 0.4, (5, 3)).astype("float32")
utt.assert_allclose(f_cpu(A_val, b_val), f_gpu(A_val, b_val)) utt.assert_allclose(f_cpu(A_val, b_val), f_gpu(A_val, b_val))
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论