提交 d2b2e4eb authored 作者: Frederic's avatar Frederic

Make the new profiler raise an error when using the GPU and that CUDA_LAUNCH_BLOCKING isn't 1.

In that case, the profile information are useless.
上级 8ec7b0a0
...@@ -16,6 +16,7 @@ __contact__ = "theano-dev <theano-dev@googlegroups.com>" ...@@ -16,6 +16,7 @@ __contact__ = "theano-dev <theano-dev@googlegroups.com>"
__docformat__ = "restructuredtext en" __docformat__ = "restructuredtext en"
import atexit import atexit
import copy import copy
import os
import sys import sys
import time import time
...@@ -161,6 +162,18 @@ class ProfileStats(object): ...@@ -161,6 +162,18 @@ class ProfileStats(object):
**kwargs - misc initializers. These should (but need not) match the **kwargs - misc initializers. These should (but need not) match the
names of the class vars declared in this class. names of the class vars declared in this class.
""" """
if (hasattr(theano, 'sandbox') and
hasattr(theano.sandbox, 'cuda') and
theano.sandbox.cuda.cuda_enabled):
if os.environ.get('CUDA_LAUNCH_BLOCKING', '0') != '1':
raise Exception(
"You are running Theano profiler with CUDA enabled."
" Theano GPU ops execution are asynchron by default."
" So by default, the profile is useless."
" You must use set the environment variable"
" CUDA_LAUNCH_BLOCKING to 1 to tell the CUDA drvier to"
" synchonize the execution to get meaning full profile.")
self.apply_callcount = {} self.apply_callcount = {}
self.output_size = {} self.output_size = {}
self.apply_time = {} self.apply_time = {}
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论