Merge pull request #724 from nouiz/small

Small

Merge pull request #724 from nouiz/small
46a7640c · nouiz · e6f30b8f · bac3c2f7 · 46a7640c · 46a7640c
--- a/theano/gradient.py
+++ b/theano/gradient.py
@@ -721,9 +721,10 @@ class numeric_grad(object):
        return (max_arg, pos[max_arg], abs_errs[max_arg], rel_errs[max_arg])
-def verify_grad(fun, pt, n_tests=2, rng=None, eps=None, out_type=None, abs_tol=None,
+def verify_grad(fun, pt, n_tests=2, rng=None, eps=None,
+                out_type=None, abs_tol=None,
                rel_tol=None, mode=None, cast_to_output_type=False):
-    """ Test a gradient by Finite Difference Method. Raise error on failure.
+    """Test a gradient by Finite Difference Method. Raise error on failure.
    Example:
        >>> verify_grad(theano.tensor.tanh,
@@ -745,6 +746,10 @@ def verify_grad(fun, pt, n_tests=2, rng=None, eps=None, out_type=None, abs_tol=N
        of sum(u * fun) at pt
    :param eps: stepsize used in the Finite Difference Method (Default
        None is type-dependent)
+        Raising the value of eps can raise or lower the absolute and
+        relative error of the verification depending of the
+        Op. Raising the eps do not lower the verification quality. It
+        is better to raise eps then raising abs_tol or rel_tol.
    :param out_type: dtype of output, if complex (i.e. 'complex32' or
        'complex64')
    :param abs_tol: absolute tolerance used as threshold for gradient
@@ -757,9 +762,10 @@ def verify_grad(fun, pt, n_tests=2, rng=None, eps=None, out_type=None, abs_tol=N
        in debug mode, which can be very slow if it has to verify a lot of
        intermediate computations.
-    :note: This op does not support multiple outputs. In tests/test_scan.py
+    :note: This function does not support multiple outputs. In
-        there is an experimental verify_grad that covers that case as well
+        tests/test_scan.py there is an experimental verify_grad that
-        by using random projections.
+        covers that case as well by using random projections.
    """
    from theano import compile, shared
    import theano.tensor

--- a/theano/misc/check_blas.py
+++ b/theano/misc/check_blas.py
@@ -187,8 +187,18 @@ if __name__ == "__main__":
        (cuda version 3.2RC and up have a faster gemm on the Fermi/GTX[45]??)
        gpu/cuda version
-        GTX580/3.2        0.20s
+        GTX680/4.2        0.154s
-        GTX480/3.2        0.24s
+        GTX580/4.2        0.164s
+        GTX480/4.2        0.192s
+        GTX470/4.2        0.238s
+        GTX285/4.2        0.452s #cuda 3.0 seam faster? driver version?
+        GTX580/3.2        0.203s
+        GTX680/3.2        0.218s
+        GTX480/3.2        0.237s
+        GTX470/3.2        0.297s
+        GTX285/3.2        0.452s #cuda 3.0 seam faster? driver version?
        GTX480/3.0        0.27s
        M2070/4.1         0.27s
        GTX470/3.2        0.29s
@@ -197,6 +207,7 @@ if __name__ == "__main__":
        GTX285/3.0        0.40s
        C1060/3.2         0.46s
        GTX550Ti/4.0      0.57s
+        520/3.2           3.06s
        520M/3.2          3.19s with bumblebee on Ubuntu 12.04
        GT220/3.2RC       3.80s
        GT210/4.0         6.35s

--- a/theano/sandbox/cuda/__init__.py
+++ b/theano/sandbox/cuda/__init__.py
@@ -163,6 +163,13 @@ if compile_cuda_ndarray:
                set_cuda_disabled()
    finally:
        release_lock()
+elif not nvcc_compiler.is_nvcc_available():
+    # This can happen if there is cuda_ndarray.so was already compiled
+    # and then nvcc is removed. In that case we need to disable the CUDA
+    # back-end as we won't be able to compile any new op and we can't only
+    # use already compiled GPU op and not the others.
+    set_cuda_disabled()
 del compile_cuda_ndarray
 if cuda_available: