提交 c1366d70 authored 作者: lamblin's avatar lamblin

Merge pull request #590 from nouiz/test_fix

Test fix
...@@ -59,7 +59,7 @@ echo "Number of elements in the compiledir:" ...@@ -59,7 +59,7 @@ echo "Number of elements in the compiledir:"
ls ${COMPILEDIR}|wc -l ls ${COMPILEDIR}|wc -l
echo "Executing nosetests with mode=FAST_RUN" echo "Executing nosetests with mode=FAST_RUN"
THEANO_FLAGS=${FLAGS},mode=FAST_RUN ${NOSETESTS} ${PROFILING} ${ARGS} THEANO_FLAGS=cmodule.warn_no_version=True,${FLAGS},mode=FAST_RUN ${NOSETESTS} ${PROFILING} ${ARGS}
echo "Number of elements in the compiledir:" echo "Number of elements in the compiledir:"
ls ${COMPILEDIR}|wc -l ls ${COMPILEDIR}|wc -l
......
...@@ -5,7 +5,8 @@ import theano.misc.pycuda_init ...@@ -5,7 +5,8 @@ import theano.misc.pycuda_init
if not theano.misc.pycuda_init.pycuda_available: if not theano.misc.pycuda_init.pycuda_available:
from nose.plugins.skip import SkipTest from nose.plugins.skip import SkipTest
raise SkipTest("Pycuda not installed. Skip test of theano op with pycuda code.") raise SkipTest("Pycuda not installed. Skip test of theano op"
" with pycuda code.")
import theano.sandbox.cuda as cuda_ndarray import theano.sandbox.cuda as cuda_ndarray
if cuda_ndarray.cuda_available == False: if cuda_ndarray.cuda_available == False:
...@@ -14,71 +15,93 @@ if cuda_ndarray.cuda_available == False: ...@@ -14,71 +15,93 @@ if cuda_ndarray.cuda_available == False:
import theano import theano
import theano.tensor as T import theano.tensor as T
from theano.misc.pycuda_example import PycudaElemwiseSourceModuleOp, PycudaElemwiseKernelOp, PycudaElemwiseSourceModuleMakeThunkOp from theano.misc.pycuda_example import (PycudaElemwiseSourceModuleOp,
# PycudaElemwiseKernelOp,
PycudaElemwiseSourceModuleMakeThunkOp)
if theano.config.mode=='FAST_COMPILE': if theano.config.mode == 'FAST_COMPILE':
mode_with_gpu = theano.compile.mode.get_mode('FAST_RUN').including('gpu') mode_with_gpu = theano.compile.mode.get_mode('FAST_RUN').including('gpu')
mode_without_gpu = theano.compile.mode.get_mode('FAST_RUN').excluding('gpu') mode_without_gpu = theano.compile.mode.get_mode(
'FAST_RUN').excluding('gpu')
else: else:
mode_with_gpu = theano.compile.mode.get_default_mode().including('gpu') mode_with_gpu = theano.compile.mode.get_default_mode().including('gpu')
mode_without_gpu = theano.compile.mode.get_default_mode().excluding('gpu') mode_without_gpu = theano.compile.mode.get_default_mode().excluding('gpu')
def test_pycuda_elemwise_source_module(): def test_pycuda_elemwise_source_module():
for shape in [(5,5), (10,49), (50,49),(500,501),(5000,5001)]: for shape in [(5, 5), (10, 49), (50, 49), (500, 501)]:
for op in [theano.scalar.basic.mul, theano.scalar.basic.add]: for op in [theano.scalar.basic.mul, theano.scalar.basic.add]:
x=T.fmatrix('x') x = T.fmatrix('x')
y=T.fmatrix('y') y = T.fmatrix('y')
elemwise_op = theano.tensor.Elemwise(op) elemwise_op = theano.tensor.Elemwise(op)
pycuda_op = PycudaElemwiseSourceModuleOp(op) pycuda_op = PycudaElemwiseSourceModuleOp(op)
pycuda_op_thunk = PycudaElemwiseSourceModuleMakeThunkOp(op) pycuda_op_thunk = PycudaElemwiseSourceModuleMakeThunkOp(op)
f=theano.function([x,y], elemwise_op(x,y), mode=mode_with_gpu) f = theano.function([x, y], elemwise_op(x, y), mode=mode_with_gpu)
f2 = theano.function([x,y], theano.sandbox.cuda.host_from_gpu(pycuda_op(x,y))) f2 = theano.function([x, y],
f3 = theano.function([x,y], elemwise_op(x,y), theano.sandbox.cuda.host_from_gpu(
mode=mode_with_gpu.including("local_pycuda_gpu_elemwise")) pycuda_op(x, y)),
f4 = theano.function([x,y], theano.sandbox.cuda.host_from_gpu(pycuda_op_thunk(x,y))) mode=mode_with_gpu)
mode_pycuda = mode_with_gpu.including("local_pycuda_gpu_elemwise")
f3 = theano.function([x, y], elemwise_op(x, y),
mode=mode_pycuda)
f4 = theano.function([x, y],
theano.sandbox.cuda.host_from_gpu(
pycuda_op_thunk(x, y)),
mode=mode_with_gpu)
assert any([ isinstance(node.op, theano.sandbox.cuda.GpuElemwise) for node in f.maker.env.toposort()]) assert any([isinstance(node.op, theano.sandbox.cuda.GpuElemwise)
assert any([ isinstance(node.op, PycudaElemwiseSourceModuleOp) for node in f2.maker.env.toposort()]) for node in f.maker.env.toposort()])
assert any([ isinstance(node.op, PycudaElemwiseSourceModuleOp) for node in f3.maker.env.toposort()]) assert any([isinstance(node.op, PycudaElemwiseSourceModuleOp)
assert any([ isinstance(node.op, PycudaElemwiseSourceModuleMakeThunkOp) for node in f4.maker.env.toposort()]) for node in f2.maker.env.toposort()])
assert any([isinstance(node.op, PycudaElemwiseSourceModuleOp)
for node in f3.maker.env.toposort()])
assert any([isinstance(node.op,
PycudaElemwiseSourceModuleMakeThunkOp)
for node in f4.maker.env.toposort()])
val1 = numpy.asarray(numpy.random.rand(*shape), dtype='float32') val1 = numpy.asarray(numpy.random.rand(*shape), dtype='float32')
val2 = numpy.asarray(numpy.random.rand(*shape), dtype='float32') val2 = numpy.asarray(numpy.random.rand(*shape), dtype='float32')
assert (f(val1,val2) == f2(val1,val2)).all() assert (f(val1, val2) == f2(val1, val2)).all()
assert (f(val1,val2) == f3(val1,val2)).all() assert (f(val1, val2) == f3(val1, val2)).all()
assert (f(val1,val2) == f4(val1,val2)).all() assert (f(val1, val2) == f4(val1, val2)).all()
#print f(val1,val2) #print f(val1,val2)
#print f2(val1,val2) #print f2(val1,val2)
"""
#commented as it work only with old pycuda version.
def test_pycuda_elemwise_kernel(): def test_pycuda_elemwise_kernel():
x=T.fmatrix('x') x = T.fmatrix('x')
y=T.fmatrix('y') y = T.fmatrix('y')
f=theano.function([x,y],x+y, mode=mode_with_gpu) f = theano.function([x, y], x + y, mode=mode_with_gpu)
print f.maker.env.toposort() print f.maker.env.toposort()
f2 = theano.function([x,y],x+y, mode=mode_with_gpu.including("local_pycuda_gpu_elemwise_kernel")) mode_pycuda = mode_with_gpu.including("local_pycuda_gpu_elemwise_kernel")
f2 = theano.function([x, y], x + y, mode=mode_pycuda)
print f2.maker.env.toposort() print f2.maker.env.toposort()
assert any([ isinstance(node.op, theano.sandbox.cuda.GpuElemwise) for node in f.maker.env.toposort()]) assert any([isinstance(node.op, theano.sandbox.cuda.GpuElemwise)
assert any([ isinstance(node.op, PycudaElemwiseKernelOp) for node in f2.maker.env.toposort()]) for node in f.maker.env.toposort()])
assert any([isinstance(node.op, PycudaElemwiseKernelOp)
for node in f2.maker.env.toposort()])
val1 = numpy.asarray(numpy.random.rand(5,5), dtype='float32') val1 = numpy.asarray(numpy.random.rand(5, 5), dtype='float32')
val2 = numpy.asarray(numpy.random.rand(5,5), dtype='float32') val2 = numpy.asarray(numpy.random.rand(5, 5), dtype='float32')
#val1 = numpy.ones((5,5)) #val1 = numpy.ones((5,5))
#val2 = numpy.arange(25).reshape(5,5) #val2 = numpy.arange(25).reshape(5,5)
assert (f(val1,val2) == f2(val1,val2)).all() assert (f(val1, val2) == f2(val1, val2)).all()
print f(val1,val2) print f(val1, val2)
print f2(val1,val2) print f2(val1, val2)
x3=T.ftensor3('x') x3 = T.ftensor3('x')
y3=T.ftensor3('y') y3 = T.ftensor3('y')
z3=T.ftensor3('y') z3 = T.ftensor3('y')
f4 = theano.function([x3,y3,z3],x3*y3+z3, mode=mode_with_gpu.including("local_pycuda_gpu_elemwise_kernel")) f4 = theano.function([x3, y3, z3], x3 * y3 + z3, mode=mode_pycuda)
print f4.maker.env.toposort() print f4.maker.env.toposort()
assert any([ isinstance(node.op, PycudaElemwiseKernelOp) for node in f4.maker.env.toposort()]) assert any([isinstance(node.op, PycudaElemwiseKernelOp)
for node in f4.maker.env.toposort()])
val1 = numpy.random.rand(2,2,2) val1 = numpy.random.rand(2, 2, 2)
print val1 print val1
print f4(val1,val1,val1) print f4(val1, val1, val1)
assert numpy.allclose(f4(val1,val1,val1),val1*val1+val1) assert numpy.allclose(f4(val1, val1, val1), val1 * val1 + val1)
"""
...@@ -78,7 +78,10 @@ __global__ void multiply_them(float *dest, float *a, float *b) ...@@ -78,7 +78,10 @@ __global__ void multiply_them(float *dest, float *a, float *b)
def test_pycuda_memory_to_theano(): def test_pycuda_memory_to_theano():
#Test that we can use the GpuArray memory space in pycuda in a CudaNdarray #Test that we can use the GpuArray memory space in pycuda in a CudaNdarray
y = pycuda.gpuarray.zeros((3, 4, 5), 'float32') y = pycuda.gpuarray.zeros((3, 4, 5), 'float32')
print numpy.asarray(y) print sys.getrefcount(y)
# This increase the ref count with never pycuda. Do pycuda also
# cache ndarray?
# print y.get()
print "gpuarray ref count before creating a CudaNdarray", print "gpuarray ref count before creating a CudaNdarray",
print sys.getrefcount(y) print sys.getrefcount(y)
assert sys.getrefcount(y) == 2 assert sys.getrefcount(y) == 2
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论