提交 3a6d2fcb authored 作者: Arnaud Bergeron's avatar Arnaud Bergeron

Fixup tutorial/using_gpu.txt

上级 a3d76ad2
...@@ -33,9 +33,6 @@ Testing Theano with GPU ...@@ -33,9 +33,6 @@ Testing Theano with GPU
To see if your GPU is being used, cut and paste the following program into a To see if your GPU is being used, cut and paste the following program into a
file and run it. file and run it.
.. If you modify this code, also change :
.. theano/tests/test_tutorial.py:T_using_gpu.test_using_gpu_1
.. testcode:: .. testcode::
from theano import function, config, shared, sandbox from theano import function, config, shared, sandbox
...@@ -111,9 +108,6 @@ the graph to express a computation with a GPU-stored result. The ``gpu_from_hos ...@@ -111,9 +108,6 @@ the graph to express a computation with a GPU-stored result. The ``gpu_from_hos
op means "copy the input from the host to the GPU" and it is optimized away op means "copy the input from the host to the GPU" and it is optimized away
after the ``T.exp(x)`` is replaced by a GPU version of ``exp()``. after the ``T.exp(x)`` is replaced by a GPU version of ``exp()``.
.. If you modify this code, also change :
.. theano/tests/test_tutorial.py:T_using_gpu.test_using_gpu_2
.. testcode:: .. testcode::
from theano import function, config, shared, sandbox from theano import function, config, shared, sandbox
...@@ -466,7 +460,6 @@ If you don't mind a loss of flexibility, you can ask theano to return ...@@ -466,7 +460,6 @@ If you don't mind a loss of flexibility, you can ask theano to return
the GPU object directly. The following code is modifed to do just that. the GPU object directly. The following code is modifed to do just that.
.. testcode:: .. testcode::
:emphasize-lines: 10,17
from theano import function, config, shared, tensor, sandbox from theano import function, config, shared, tensor, sandbox
import numpy import numpy
...@@ -501,14 +494,13 @@ The output is ...@@ -501,14 +494,13 @@ The output is
.. testoutput:: .. testoutput::
:hide: :hide:
:options: +ELLIPSIS :options: +ELLIPSIS, +SKIP
$ THEANO_FLAGS=device=cuda0 python check2.py Using device cuda0: ...
Using device cuda0: GeForce GTX 275 [GpuElemwise{exp,no_inplace}(<GpuArray<float64>>)]
[GpuElemwise{exp,no_inplace}(<GpuArray<float64>>)] Looping 1000 times took ... seconds
Looping 1000 times took ... seconds Result is ...
Result is ... Used the gpu
Used the gpu
.. code-block:: none .. code-block:: none
......
...@@ -21,154 +21,6 @@ from theano.tests import unittest_tools as utt ...@@ -21,154 +21,6 @@ from theano.tests import unittest_tools as utt
from theano.sandbox.rng_mrg import MRG_RandomStreams from theano.sandbox.rng_mrg import MRG_RandomStreams
from theano.tensor.shared_randomstreams import RandomStreams from theano.tensor.shared_randomstreams import RandomStreams
class T_using_gpu(unittest.TestCase):
# All tests here belog to
# http://deeplearning.net/software/theano/tutorial/using_gpu.html
# Theano/doc/tutorial/using_gpu.txt
# Any change you do here also add it to the tutorial !
def test_using_gpu_1(self):
# I'm checking if this compiles and runs
from theano import function, config, shared, sandbox
import theano.tensor as T
import numpy
import time
vlen = 10 * 30 * 70 # 10 x #cores x # threads per core
iters = 10
rng = numpy.random.RandomState(22)
x = shared(numpy.asarray(rng.rand(vlen), config.floatX))
f = function([], T.exp(x))
# print f.maker.fgraph.toposort()
t0 = time.time()
for i in xrange(iters):
r = f()
t1 = time.time()
print('Looping %d times took' % iters, t1 - t0, 'seconds')
print('Result is', r)
if numpy.any([isinstance(x.op, T.Elemwise) for x in f.maker.fgraph.toposort()]):
print('Used the cpu')
else:
print('Used the gpu')
if theano.config.device.find('gpu') > -1:
assert not numpy.any( [isinstance(x.op, T.Elemwise) for x in f.maker.fgraph.toposort()])
else:
assert numpy.any([isinstance(x.op, T.Elemwise) for x in f.maker.fgraph.toposort()])
def test_using_gpu_2(self):
if theano.config.device.find('gpu') > -1:
from theano import function, config, shared, sandbox
import theano.tensor as T
import numpy
import time
vlen = 10 * 30 * 70 # 10 x #cores x # threads per core
iters = 10
rng = numpy.random.RandomState(22)
x = shared(numpy.asarray(rng.rand(vlen), config.floatX))
f = function([], sandbox.cuda.basic_ops.gpu_from_host(T.exp(x)))
# print f.maker.fgraph.toposort()
t0 = time.time()
for i in xrange(iters):
r = f()
t1 = time.time()
print('Looping %d times took' % iters, t1 - t0, 'seconds')
print('Result is', r)
print('Numpy result is', numpy.asarray(r))
if numpy.any([isinstance(x.op, T.Elemwise) for x in f.maker.fgraph.toposort()]):
print('Used the cpu')
else:
print('Used the gpu')
assert not numpy.any([isinstance(x.op, T.Elemwise) for x in f.maker.fgraph.toposort()])
def test_using_gpu_3(self):
if theano.config.device.find('gpu') > -1:
from theano import function, config, shared, sandbox, Out
import theano.tensor as T
import numpy
import time
vlen = 10 * 30 * 70 # 10 x #cores x # threads per core
iters = 10
rng = numpy.random.RandomState(22)
x = shared(numpy.asarray(rng.rand(vlen), config.floatX))
f = function([],
Out(sandbox.cuda.basic_ops.gpu_from_host(T.exp(x)),
borrow=True))
# print f.maker.fgraph.toposort()
t0 = time.time()
for i in xrange(iters):
r = f()
t1 = time.time()
print('Looping %d times took' % iters, t1 - t0, 'seconds')
print('Result is', r)
print('Numpy result is', numpy.asarray(r))
if numpy.any([isinstance(x.op, T.Elemwise)
for x in f.maker.fgraph.toposort()]):
print('Used the cpu')
else:
print('Used the gpu')
assert not numpy.any([isinstance(x.op, T.Elemwise)
for x in f.maker.fgraph.toposort()])
def test_using_gpu_pycudaop(self):
import theano.misc.pycuda_init
if not theano.misc.pycuda_init.pycuda_available:
raise SkipTest("Pycuda not installed. Skip test of theano op"
" with pycuda code.")
from pycuda.compiler import SourceModule
import theano.sandbox.cuda as cuda
import theano.sandbox.cuda as cuda_ndarray
if not cuda_ndarray.cuda_available:
raise SkipTest('Optional package cuda disabled')
class PyCUDADoubleOp(theano.Op):
__props__ = ()
def make_node(self, inp):
inp = cuda.basic_ops.gpu_contiguous(
cuda.basic_ops.as_cuda_ndarray_variable(inp))
assert inp.dtype == "float32"
return theano.Apply(self, [inp], [inp.type()])
def make_thunk(self, node, storage_map, _, _2):
mod = SourceModule("""
__global__ void my_fct(float * i0, float * o0, int size) {
int i = blockIdx.x*blockDim.x + threadIdx.x;
if(i<size){
o0[i] = i0[i]*2;
}
}""")
pycuda_fct = mod.get_function("my_fct")
inputs = [storage_map[v] for v in node.inputs]
outputs = [storage_map[v] for v in node.outputs]
def thunk():
z = outputs[0]
if z[0] is None or z[0].shape != inputs[0][0].shape:
z[0] = cuda.CudaNdarray.zeros(inputs[0][0].shape)
grid = (int(numpy.ceil(inputs[0][0].size / 512.)), 1)
pycuda_fct(inputs[0][0], z[0],
numpy.intc(inputs[0][0].size),
block=(512, 1, 1), grid=grid)
return thunk
x = theano.tensor.fmatrix()
f = theano.function([x], PyCUDADoubleOp()(x))
xv = numpy.ones((4, 5), dtype="float32")
assert numpy.allclose(f(xv), xv*2)
class T_typedlist(unittest.TestCase): class T_typedlist(unittest.TestCase):
# All tests here belong to # All tests here belong to
# http://deeplearning.net/software/theano/library/typed_list.html # http://deeplearning.net/software/theano/library/typed_list.html
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论