提交 c1366d70 authored 作者: lamblin's avatar lamblin

Merge pull request #590 from nouiz/test_fix

Test fix
......@@ -59,7 +59,7 @@ echo "Number of elements in the compiledir:"
ls ${COMPILEDIR}|wc -l
echo "Executing nosetests with mode=FAST_RUN"
THEANO_FLAGS=${FLAGS},mode=FAST_RUN ${NOSETESTS} ${PROFILING} ${ARGS}
THEANO_FLAGS=cmodule.warn_no_version=True,${FLAGS},mode=FAST_RUN ${NOSETESTS} ${PROFILING} ${ARGS}
echo "Number of elements in the compiledir:"
ls ${COMPILEDIR}|wc -l
......
......@@ -5,7 +5,8 @@ import theano.misc.pycuda_init
if not theano.misc.pycuda_init.pycuda_available:
from nose.plugins.skip import SkipTest
raise SkipTest("Pycuda not installed. Skip test of theano op with pycuda code.")
raise SkipTest("Pycuda not installed. Skip test of theano op"
" with pycuda code.")
import theano.sandbox.cuda as cuda_ndarray
if cuda_ndarray.cuda_available == False:
......@@ -14,71 +15,93 @@ if cuda_ndarray.cuda_available == False:
import theano
import theano.tensor as T
from theano.misc.pycuda_example import PycudaElemwiseSourceModuleOp, PycudaElemwiseKernelOp, PycudaElemwiseSourceModuleMakeThunkOp
from theano.misc.pycuda_example import (PycudaElemwiseSourceModuleOp,
# PycudaElemwiseKernelOp,
PycudaElemwiseSourceModuleMakeThunkOp)
if theano.config.mode=='FAST_COMPILE':
if theano.config.mode == 'FAST_COMPILE':
mode_with_gpu = theano.compile.mode.get_mode('FAST_RUN').including('gpu')
mode_without_gpu = theano.compile.mode.get_mode('FAST_RUN').excluding('gpu')
mode_without_gpu = theano.compile.mode.get_mode(
'FAST_RUN').excluding('gpu')
else:
mode_with_gpu = theano.compile.mode.get_default_mode().including('gpu')
mode_without_gpu = theano.compile.mode.get_default_mode().excluding('gpu')
def test_pycuda_elemwise_source_module():
for shape in [(5,5), (10,49), (50,49),(500,501),(5000,5001)]:
for shape in [(5, 5), (10, 49), (50, 49), (500, 501)]:
for op in [theano.scalar.basic.mul, theano.scalar.basic.add]:
x=T.fmatrix('x')
y=T.fmatrix('y')
x = T.fmatrix('x')
y = T.fmatrix('y')
elemwise_op = theano.tensor.Elemwise(op)
pycuda_op = PycudaElemwiseSourceModuleOp(op)
pycuda_op_thunk = PycudaElemwiseSourceModuleMakeThunkOp(op)
f=theano.function([x,y], elemwise_op(x,y), mode=mode_with_gpu)
f2 = theano.function([x,y], theano.sandbox.cuda.host_from_gpu(pycuda_op(x,y)))
f3 = theano.function([x,y], elemwise_op(x,y),
mode=mode_with_gpu.including("local_pycuda_gpu_elemwise"))
f4 = theano.function([x,y], theano.sandbox.cuda.host_from_gpu(pycuda_op_thunk(x,y)))
f = theano.function([x, y], elemwise_op(x, y), mode=mode_with_gpu)
f2 = theano.function([x, y],
theano.sandbox.cuda.host_from_gpu(
pycuda_op(x, y)),
mode=mode_with_gpu)
mode_pycuda = mode_with_gpu.including("local_pycuda_gpu_elemwise")
f3 = theano.function([x, y], elemwise_op(x, y),
mode=mode_pycuda)
f4 = theano.function([x, y],
theano.sandbox.cuda.host_from_gpu(
pycuda_op_thunk(x, y)),
mode=mode_with_gpu)
assert any([ isinstance(node.op, theano.sandbox.cuda.GpuElemwise) for node in f.maker.env.toposort()])
assert any([ isinstance(node.op, PycudaElemwiseSourceModuleOp) for node in f2.maker.env.toposort()])
assert any([ isinstance(node.op, PycudaElemwiseSourceModuleOp) for node in f3.maker.env.toposort()])
assert any([ isinstance(node.op, PycudaElemwiseSourceModuleMakeThunkOp) for node in f4.maker.env.toposort()])
assert any([isinstance(node.op, theano.sandbox.cuda.GpuElemwise)
for node in f.maker.env.toposort()])
assert any([isinstance(node.op, PycudaElemwiseSourceModuleOp)
for node in f2.maker.env.toposort()])
assert any([isinstance(node.op, PycudaElemwiseSourceModuleOp)
for node in f3.maker.env.toposort()])
assert any([isinstance(node.op,
PycudaElemwiseSourceModuleMakeThunkOp)
for node in f4.maker.env.toposort()])
val1 = numpy.asarray(numpy.random.rand(*shape), dtype='float32')
val2 = numpy.asarray(numpy.random.rand(*shape), dtype='float32')
assert (f(val1,val2) == f2(val1,val2)).all()
assert (f(val1,val2) == f3(val1,val2)).all()
assert (f(val1,val2) == f4(val1,val2)).all()
assert (f(val1, val2) == f2(val1, val2)).all()
assert (f(val1, val2) == f3(val1, val2)).all()
assert (f(val1, val2) == f4(val1, val2)).all()
#print f(val1,val2)
#print f2(val1,val2)
"""
#commented as it work only with old pycuda version.
def test_pycuda_elemwise_kernel():
x=T.fmatrix('x')
y=T.fmatrix('y')
f=theano.function([x,y],x+y, mode=mode_with_gpu)
x = T.fmatrix('x')
y = T.fmatrix('y')
f = theano.function([x, y], x + y, mode=mode_with_gpu)
print f.maker.env.toposort()
f2 = theano.function([x,y],x+y, mode=mode_with_gpu.including("local_pycuda_gpu_elemwise_kernel"))
mode_pycuda = mode_with_gpu.including("local_pycuda_gpu_elemwise_kernel")
f2 = theano.function([x, y], x + y, mode=mode_pycuda)
print f2.maker.env.toposort()
assert any([ isinstance(node.op, theano.sandbox.cuda.GpuElemwise) for node in f.maker.env.toposort()])
assert any([ isinstance(node.op, PycudaElemwiseKernelOp) for node in f2.maker.env.toposort()])
assert any([isinstance(node.op, theano.sandbox.cuda.GpuElemwise)
for node in f.maker.env.toposort()])
assert any([isinstance(node.op, PycudaElemwiseKernelOp)
for node in f2.maker.env.toposort()])
val1 = numpy.asarray(numpy.random.rand(5,5), dtype='float32')
val2 = numpy.asarray(numpy.random.rand(5,5), dtype='float32')
val1 = numpy.asarray(numpy.random.rand(5, 5), dtype='float32')
val2 = numpy.asarray(numpy.random.rand(5, 5), dtype='float32')
#val1 = numpy.ones((5,5))
#val2 = numpy.arange(25).reshape(5,5)
assert (f(val1,val2) == f2(val1,val2)).all()
print f(val1,val2)
print f2(val1,val2)
assert (f(val1, val2) == f2(val1, val2)).all()
print f(val1, val2)
print f2(val1, val2)
x3=T.ftensor3('x')
y3=T.ftensor3('y')
z3=T.ftensor3('y')
x3 = T.ftensor3('x')
y3 = T.ftensor3('y')
z3 = T.ftensor3('y')
f4 = theano.function([x3,y3,z3],x3*y3+z3, mode=mode_with_gpu.including("local_pycuda_gpu_elemwise_kernel"))
f4 = theano.function([x3, y3, z3], x3 * y3 + z3, mode=mode_pycuda)
print f4.maker.env.toposort()
assert any([ isinstance(node.op, PycudaElemwiseKernelOp) for node in f4.maker.env.toposort()])
assert any([isinstance(node.op, PycudaElemwiseKernelOp)
for node in f4.maker.env.toposort()])
val1 = numpy.random.rand(2,2,2)
val1 = numpy.random.rand(2, 2, 2)
print val1
print f4(val1,val1,val1)
assert numpy.allclose(f4(val1,val1,val1),val1*val1+val1)
print f4(val1, val1, val1)
assert numpy.allclose(f4(val1, val1, val1), val1 * val1 + val1)
"""
......@@ -78,7 +78,10 @@ __global__ void multiply_them(float *dest, float *a, float *b)
def test_pycuda_memory_to_theano():
#Test that we can use the GpuArray memory space in pycuda in a CudaNdarray
y = pycuda.gpuarray.zeros((3, 4, 5), 'float32')
print numpy.asarray(y)
print sys.getrefcount(y)
# This increase the ref count with never pycuda. Do pycuda also
# cache ndarray?
# print y.get()
print "gpuarray ref count before creating a CudaNdarray",
print sys.getrefcount(y)
assert sys.getrefcount(y) == 2
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论