提交 46a7640c authored 作者: nouiz's avatar nouiz

Merge pull request #724 from nouiz/small

Small
...@@ -721,9 +721,10 @@ class numeric_grad(object): ...@@ -721,9 +721,10 @@ class numeric_grad(object):
return (max_arg, pos[max_arg], abs_errs[max_arg], rel_errs[max_arg]) return (max_arg, pos[max_arg], abs_errs[max_arg], rel_errs[max_arg])
def verify_grad(fun, pt, n_tests=2, rng=None, eps=None, out_type=None, abs_tol=None, def verify_grad(fun, pt, n_tests=2, rng=None, eps=None,
out_type=None, abs_tol=None,
rel_tol=None, mode=None, cast_to_output_type=False): rel_tol=None, mode=None, cast_to_output_type=False):
""" Test a gradient by Finite Difference Method. Raise error on failure. """Test a gradient by Finite Difference Method. Raise error on failure.
Example: Example:
>>> verify_grad(theano.tensor.tanh, >>> verify_grad(theano.tensor.tanh,
...@@ -745,6 +746,10 @@ def verify_grad(fun, pt, n_tests=2, rng=None, eps=None, out_type=None, abs_tol=N ...@@ -745,6 +746,10 @@ def verify_grad(fun, pt, n_tests=2, rng=None, eps=None, out_type=None, abs_tol=N
of sum(u * fun) at pt of sum(u * fun) at pt
:param eps: stepsize used in the Finite Difference Method (Default :param eps: stepsize used in the Finite Difference Method (Default
None is type-dependent) None is type-dependent)
Raising the value of eps can raise or lower the absolute and
relative error of the verification depending of the
Op. Raising the eps do not lower the verification quality. It
is better to raise eps then raising abs_tol or rel_tol.
:param out_type: dtype of output, if complex (i.e. 'complex32' or :param out_type: dtype of output, if complex (i.e. 'complex32' or
'complex64') 'complex64')
:param abs_tol: absolute tolerance used as threshold for gradient :param abs_tol: absolute tolerance used as threshold for gradient
...@@ -757,9 +762,10 @@ def verify_grad(fun, pt, n_tests=2, rng=None, eps=None, out_type=None, abs_tol=N ...@@ -757,9 +762,10 @@ def verify_grad(fun, pt, n_tests=2, rng=None, eps=None, out_type=None, abs_tol=N
in debug mode, which can be very slow if it has to verify a lot of in debug mode, which can be very slow if it has to verify a lot of
intermediate computations. intermediate computations.
:note: This op does not support multiple outputs. In tests/test_scan.py :note: This function does not support multiple outputs. In
there is an experimental verify_grad that covers that case as well tests/test_scan.py there is an experimental verify_grad that
by using random projections. covers that case as well by using random projections.
""" """
from theano import compile, shared from theano import compile, shared
import theano.tensor import theano.tensor
......
...@@ -187,8 +187,18 @@ if __name__ == "__main__": ...@@ -187,8 +187,18 @@ if __name__ == "__main__":
(cuda version 3.2RC and up have a faster gemm on the Fermi/GTX[45]??) (cuda version 3.2RC and up have a faster gemm on the Fermi/GTX[45]??)
gpu/cuda version gpu/cuda version
GTX580/3.2 0.20s GTX680/4.2 0.154s
GTX480/3.2 0.24s GTX580/4.2 0.164s
GTX480/4.2 0.192s
GTX470/4.2 0.238s
GTX285/4.2 0.452s #cuda 3.0 seam faster? driver version?
GTX580/3.2 0.203s
GTX680/3.2 0.218s
GTX480/3.2 0.237s
GTX470/3.2 0.297s
GTX285/3.2 0.452s #cuda 3.0 seam faster? driver version?
GTX480/3.0 0.27s GTX480/3.0 0.27s
M2070/4.1 0.27s M2070/4.1 0.27s
GTX470/3.2 0.29s GTX470/3.2 0.29s
...@@ -197,6 +207,7 @@ if __name__ == "__main__": ...@@ -197,6 +207,7 @@ if __name__ == "__main__":
GTX285/3.0 0.40s GTX285/3.0 0.40s
C1060/3.2 0.46s C1060/3.2 0.46s
GTX550Ti/4.0 0.57s GTX550Ti/4.0 0.57s
520/3.2 3.06s
520M/3.2 3.19s with bumblebee on Ubuntu 12.04 520M/3.2 3.19s with bumblebee on Ubuntu 12.04
GT220/3.2RC 3.80s GT220/3.2RC 3.80s
GT210/4.0 6.35s GT210/4.0 6.35s
......
...@@ -163,6 +163,13 @@ if compile_cuda_ndarray: ...@@ -163,6 +163,13 @@ if compile_cuda_ndarray:
set_cuda_disabled() set_cuda_disabled()
finally: finally:
release_lock() release_lock()
elif not nvcc_compiler.is_nvcc_available():
# This can happen if there is cuda_ndarray.so was already compiled
# and then nvcc is removed. In that case we need to disable the CUDA
# back-end as we won't be able to compile any new op and we can't only
# use already compiled GPU op and not the others.
set_cuda_disabled()
del compile_cuda_ndarray del compile_cuda_ndarray
if cuda_available: if cuda_available:
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论