提交 9da67d20 authored 作者: abergeron's avatar abergeron

Merge pull request #2153 from nouiz/cuda_tests

[BUILDBOT CRASH] Fix test in FAST_COMPILE.
...@@ -65,6 +65,20 @@ Here is an example output when we disable some Theano optimizations to ...@@ -65,6 +65,20 @@ Here is an example output when we disable some Theano optimizations to
give you a better idea of the difference between sections. With all give you a better idea of the difference between sections. With all
optimizations enabled, there would be only one op left in the graph. optimizations enabled, there would be only one op left in the graph.
.. note::
To profile the peak memory usage on the GPU you need to do::
* In the file theano/sandbox/cuda/cuda_ndarray.cu, set the macro
COMPUTE_GPU_MEM_USED to 1.
* Then call theano.sandbox.cuda.theano_allocated()
It return a tuple with two ints. The first is the current GPU
memory allocated by Theano. The second is the peak GPU memory
that was allocated by Theano.
Do not always enable this, as this slowdown memory allocation and
free. As this slowdown the computation, this will affect speed
profiling. So don't use both at the same time.
to run the example: to run the example:
......
...@@ -563,7 +563,9 @@ def _test_valid(cls, mode=None, extra_shapes=[], version=[-1]): ...@@ -563,7 +563,9 @@ def _test_valid(cls, mode=None, extra_shapes=[], version=[-1]):
def test_valid(): def test_valid():
for t in _test_valid(None, version=[-2, -1, 6]): for t in _test_valid(None,
mode=theano_mode,
version=[-2, -1, 6]):
yield t yield t
...@@ -648,7 +650,9 @@ def _test_full(cls, mode=None, version=[-1], extra_shapes=[]): ...@@ -648,7 +650,9 @@ def _test_full(cls, mode=None, version=[-1], extra_shapes=[]):
def test_full(): def test_full():
for t in _test_full(None, version=[-2, -1, 0, 1, 2, 3, 4, 5]): for t in _test_full(None,
mode=theano_mode,
version=[-2, -1, 0, 1, 2, 3, 4, 5]):
yield t yield t
......
...@@ -551,7 +551,8 @@ def conv3D(V, W, b, d): ...@@ -551,7 +551,8 @@ def conv3D(V, W, b, d):
This is for optimization. This is for optimization.
:note: The GPU implementation is very slow. You should use :note: The GPU implementation is very slow. You should use
:func:`conv3d2d <theano.tensor.nnet.conv3d2d.conv3d>` for a :func:`conv3d2d <theano.tensor.nnet.conv3d2d.conv3d>` or
:func:`conv3d_fft <theano.sandbox.cuda.fftconv.conv3d_fft>` for a
GPU graph instead. GPU graph instead.
:see: Someone made a script that shows how to swap the axes :see: Someone made a script that shows how to swap the axes
......
...@@ -176,7 +176,8 @@ def conv3d(signals, filters, ...@@ -176,7 +176,8 @@ def conv3d(signals, filters,
:note: Another way to define signals: (batch, time, in channel, row, column) :note: Another way to define signals: (batch, time, in channel, row, column)
Another way to define filters: (out channel,time,in channel, row, column) Another way to define filters: (out channel,time,in channel, row, column)
:note: See the `conv3d_fft`_ or `conv3d2d`_ for GPU implementations. :note: For the GPU, you can use this implementation or
:func:`conv3d_fft <theano.sandbox.cuda.fftconv.conv3d_fft>`.
:see: Someone made a script that shows how to swap the axes between :see: Someone made a script that shows how to swap the axes between
both 3d convolution implementations in Theano. See the last both 3d convolution implementations in Theano. See the last
......
...@@ -339,7 +339,7 @@ def register_specialize(lopt, *tags, **kwargs): ...@@ -339,7 +339,7 @@ def register_specialize(lopt, *tags, **kwargs):
else: else:
name = (kwargs and kwargs.pop('name')) or lopt.__name__ name = (kwargs and kwargs.pop('name')) or lopt.__name__
compile.optdb['specialize'].register(name, lopt, 'fast_run', compile.optdb['specialize'].register(name, lopt, 'fast_run',
'fast_compile_gpu', *tags) *tags)
return lopt return lopt
......
...@@ -6430,7 +6430,8 @@ class TestInferShape(utt.InferShapeTester): ...@@ -6430,7 +6430,8 @@ class TestInferShape(utt.InferShapeTester):
self._compile_and_check([advec, bdvec], self._compile_and_check([advec, bdvec],
[Dot()(advec, bdvec)], [Dot()(advec, bdvec)],
[advec_val, bdvec_val], [advec_val, bdvec_val],
(Dot, tensor.blas.Gemv, tensor.blas_c.CGemv)) (Dot, tensor.blas.Dot22,
tensor.blas.Gemv, tensor.blas_c.CGemv))
#mat/mat #mat/mat
admat = dmatrix() admat = dmatrix()
...@@ -6447,14 +6448,16 @@ class TestInferShape(utt.InferShapeTester): ...@@ -6447,14 +6448,16 @@ class TestInferShape(utt.InferShapeTester):
self._compile_and_check([advec, bdmat], self._compile_and_check([advec, bdmat],
[Dot()(advec, bdmat)], [Dot()(advec, bdmat)],
[advec_val, bdmat_val], [advec_val, bdmat_val],
(Dot, tensor.blas.Gemv, tensor.blas_c.CGemv)) (Dot, tensor.blas.Dot22,
tensor.blas.Gemv, tensor.blas_c.CGemv))
#mat/vec #mat/vec
admat_val = rand(5, 4) admat_val = rand(5, 4)
self._compile_and_check([admat, bdvec], self._compile_and_check([admat, bdvec],
[Dot()(admat, bdvec)], [Dot()(admat, bdvec)],
[admat_val, bdvec_val], [admat_val, bdvec_val],
(Dot, tensor.blas.Gemv, tensor.blas_c.CGemv)) (Dot, tensor.blas.Dot22,
tensor.blas.Gemv, tensor.blas_c.CGemv))
# Split # Split
aivec = ivector() aivec = ivector()
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论