提交 cf7d0688 authored 作者: Arnaud Bergeron's avatar Arnaud Bergeron

Remove tentacles in compile.

上级 df95d9a9
...@@ -126,17 +126,6 @@ else: ...@@ -126,17 +126,6 @@ else:
raise ImportError("The nose module is not installed." raise ImportError("The nose module is not installed."
" It is needed for Theano tests.") " It is needed for Theano tests.")
if config.device.startswith('gpu') or config.init_gpu_device.startswith('gpu'):
import theano.sandbox.cuda
# We can't test the driver during import of theano.sandbox.cuda as
# this cause circular import dependency. So we also test it manually
# after the import
if theano.sandbox.cuda.cuda_available:
import theano.sandbox.cuda.tests.test_driver
if config.enable_initial_driver_test:
theano.sandbox.cuda.tests.test_driver.test_nvidia_driver1()
if (config.device.startswith('cuda') or if (config.device.startswith('cuda') or
config.device.startswith('opencl') or config.device.startswith('opencl') or
config.init_gpu_device.startswith('cuda') or config.init_gpu_device.startswith('cuda') or
......
...@@ -1198,10 +1198,11 @@ def _get_preallocated_maps(node, thunk, prealloc_modes, def_val, ...@@ -1198,10 +1198,11 @@ def _get_preallocated_maps(node, thunk, prealloc_modes, def_val,
# To avoid circular imports # To avoid circular imports
from theano.tensor import TensorType from theano.tensor import TensorType
from theano.sandbox.cuda import cuda_available, CudaNdarrayType from theano.gpuarray import GpuArrayType
if cuda_available: try:
from theano.sandbox.cuda import CudaNdarray import pygpu
from theano.sandbox.cuda import dimshuffle as cuda_dimshuffle except ImportError:
pass
# TODO: Sparse? Scalar does not really make sense. # TODO: Sparse? Scalar does not really make sense.
...@@ -1240,7 +1241,7 @@ def _get_preallocated_maps(node, thunk, prealloc_modes, def_val, ...@@ -1240,7 +1241,7 @@ def _get_preallocated_maps(node, thunk, prealloc_modes, def_val,
for r in considered_outputs: for r in considered_outputs:
# There is no risk to overwrite inputs, since r does not work # There is no risk to overwrite inputs, since r does not work
# inplace. # inplace.
if isinstance(r.type, (TensorType, CudaNdarrayType)): if isinstance(r.type, (TensorType, GpuArrayType)):
reuse_outputs[r][...] = np.asarray( reuse_outputs[r][...] = np.asarray(
def_val).astype(r.type.dtype) def_val).astype(r.type.dtype)
...@@ -1250,15 +1251,14 @@ def _get_preallocated_maps(node, thunk, prealloc_modes, def_val, ...@@ -1250,15 +1251,14 @@ def _get_preallocated_maps(node, thunk, prealloc_modes, def_val,
del reuse_outputs del reuse_outputs
# c_cont_output: use a c-continuous array # c_cont_output: use a c-continuous array
# (for TensorType and CudaNdarray, else None) # (for TensorType, else None)
if 'c_contiguous' in prealloc_modes or 'ALL' in prealloc_modes: if 'c_contiguous' in prealloc_modes or 'ALL' in prealloc_modes:
c_cont_outputs = {} c_cont_outputs = {}
for r in considered_outputs: for r in considered_outputs:
if isinstance(r.type, (TensorType, CudaNdarrayType)): if isinstance(r.type, (TensorType, GpuArrayType)):
# Build a C-contiguous buffer # Build a C-contiguous buffer
new_buf = r.type.value_zeros(r_vals[r].shape) new_buf = r.type.value_zeros(r_vals[r].shape)
# CudaNdarray don't have flags field assert new_buf.flags["C_CONTIGUOUS"]
# assert new_buf.flags["C_CONTIGUOUS"]
new_buf[...] = np.asarray(def_val).astype(r.type.dtype) new_buf[...] = np.asarray(def_val).astype(r.type.dtype)
c_cont_outputs[r] = new_buf c_cont_outputs[r] = new_buf
...@@ -1272,18 +1272,14 @@ def _get_preallocated_maps(node, thunk, prealloc_modes, def_val, ...@@ -1272,18 +1272,14 @@ def _get_preallocated_maps(node, thunk, prealloc_modes, def_val,
if 'f_contiguous' in prealloc_modes or 'ALL' in prealloc_modes: if 'f_contiguous' in prealloc_modes or 'ALL' in prealloc_modes:
f_cont_outputs = {} f_cont_outputs = {}
for r in considered_outputs: for r in considered_outputs:
if isinstance(r.type, (TensorType, CudaNdarrayType)): if isinstance(r.type, (TensorType, GpuArrayType)):
new_buf = np.zeros( new_buf = np.zeros(
shape=r_vals[r].shape, shape=r_vals[r].shape,
dtype=r_vals[r].dtype, dtype=r_vals[r].dtype,
order='F') order='F')
new_buf[...] = def_val new_buf[...] = def_val
if isinstance(r.type, CudaNdarrayType): if isinstance(r.type, GpuArrayType):
# When the CudaNdarray is built, the underlying memory new_buf = pygpu.array(new_buf)
# is c-contiguous, so we transpose it before and after.
new_buf = CudaNdarray(new_buf.T)
new_buf = cuda_dimshuffle(
new_buf, reversed(list(range(new_buf.ndim))))
f_cont_outputs[r] = new_buf f_cont_outputs[r] = new_buf
...@@ -1305,7 +1301,7 @@ def _get_preallocated_maps(node, thunk, prealloc_modes, def_val, ...@@ -1305,7 +1301,7 @@ def _get_preallocated_maps(node, thunk, prealloc_modes, def_val,
max_ndim = 0 max_ndim = 0
rev_out_broadcastable = [] rev_out_broadcastable = []
for r in considered_outputs: for r in considered_outputs:
if isinstance(r.type, (TensorType, CudaNdarrayType)): if isinstance(r.type, (TensorType, GpuArrayType)):
if max_ndim < r.ndim: if max_ndim < r.ndim:
rev_out_broadcastable += [True] * (r.ndim - max_ndim) rev_out_broadcastable += [True] * (r.ndim - max_ndim)
max_ndim = r.ndim max_ndim = r.ndim
...@@ -1320,7 +1316,7 @@ def _get_preallocated_maps(node, thunk, prealloc_modes, def_val, ...@@ -1320,7 +1316,7 @@ def _get_preallocated_maps(node, thunk, prealloc_modes, def_val,
# Initial allocation # Initial allocation
init_strided = {} init_strided = {}
for r in considered_outputs: for r in considered_outputs:
if isinstance(r.type, (TensorType, CudaNdarrayType)): if isinstance(r.type, (TensorType, GpuArrayType)):
# Create a buffer twice as large in every dimension, # Create a buffer twice as large in every dimension,
# except if broadcastable, or for dimensions above # except if broadcastable, or for dimensions above
# config.DebugMode.check_preallocated_output_ndim # config.DebugMode.check_preallocated_output_ndim
...@@ -1399,7 +1395,7 @@ def _get_preallocated_maps(node, thunk, prealloc_modes, def_val, ...@@ -1399,7 +1395,7 @@ def _get_preallocated_maps(node, thunk, prealloc_modes, def_val,
name = 'wrong_size%s' % str(tuple(shape_diff)) name = 'wrong_size%s' % str(tuple(shape_diff))
for r in considered_outputs: for r in considered_outputs:
if isinstance(r.type, (TensorType, CudaNdarrayType)): if isinstance(r.type, (TensorType, GpuArrayType)):
r_shape_diff = shape_diff[:r.ndim] r_shape_diff = shape_diff[:r.ndim]
out_shape = [max((s + sd), 0) out_shape = [max((s + sd), 0)
for s, sd in zip(r_vals[r].shape, for s, sd in zip(r_vals[r].shape,
...@@ -1741,7 +1737,6 @@ class _VariableEquivalenceTracker(object): ...@@ -1741,7 +1737,6 @@ class _VariableEquivalenceTracker(object):
# List of default version of make thunk. # List of default version of make thunk.
# This is needed to know if the user overrided it. # This is needed to know if the user overrided it.
# The GpuOp will be added here when theano.sandbox.cuda is imported.
default_make_thunk = [get_unbound_function(theano.gof.Op.make_thunk)] default_make_thunk = [get_unbound_function(theano.gof.Op.make_thunk)]
......
...@@ -8,7 +8,6 @@ import numpy as np ...@@ -8,7 +8,6 @@ import numpy as np
import theano import theano
from theano.configparser import config from theano.configparser import config
import theano.tensor as T import theano.tensor as T
import theano.sandbox.cuda as cuda
from theano.compile import Mode from theano.compile import Mode
from .mode import get_mode from .mode import get_mode
...@@ -107,16 +106,6 @@ def contains_nan(arr, node=None, var=None): ...@@ -107,16 +106,6 @@ def contains_nan(arr, node=None, var=None):
""" """
if not _is_numeric_value(arr, var): if not _is_numeric_value(arr, var):
return False return False
elif cuda.cuda_available and isinstance(arr, cuda.CudaNdarray):
if (node and hasattr(theano.sandbox, 'rng_mrg') and
isinstance(
node.op,
# It store ints in float container
theano.sandbox.rng_mrg.GPU_mrg_uniform)):
return False
else:
compile_gpu_func(True, False, False)
return np.isnan(f_gpumin(arr.reshape(arr.size)))
elif pygpu_available and isinstance(arr, GpuArray): elif pygpu_available and isinstance(arr, GpuArray):
return np.isnan(f_gpua_min(arr.reshape(arr.size))) return np.isnan(f_gpua_min(arr.reshape(arr.size)))
...@@ -150,70 +139,12 @@ def contains_inf(arr, node=None, var=None): ...@@ -150,70 +139,12 @@ def contains_inf(arr, node=None, var=None):
""" """
if not _is_numeric_value(arr, var): if not _is_numeric_value(arr, var):
return False return False
elif cuda.cuda_available and isinstance(arr, cuda.CudaNdarray):
if (node and hasattr(theano.sandbox, 'rng_mrg') and
isinstance(
node.op,
# It store ints in float container
theano.sandbox.rng_mrg.GPU_mrg_uniform)):
return False
else:
compile_gpu_func(False, True, False)
return (np.isinf(f_gpumin(arr.reshape(arr.size))) or
np.isinf(f_gpumax(arr.reshape(arr.size))))
elif pygpu_available and isinstance(arr, GpuArray): elif pygpu_available and isinstance(arr, GpuArray):
return (np.isinf(f_gpua_min(arr.reshape(arr.size))) or return (np.isinf(f_gpua_min(arr.reshape(arr.size))) or
np.isinf(f_gpua_max(arr.reshape(arr.size)))) np.isinf(f_gpua_max(arr.reshape(arr.size))))
return np.isinf(np.nanmax(arr)) or np.isinf(np.nanmin(arr)) return np.isinf(np.nanmax(arr)) or np.isinf(np.nanmin(arr))
f_gpumin = None
f_gpumax = None
f_gpuabsmax = None
def compile_gpu_func(nan_is_error, inf_is_error, big_is_error):
""" compile utility function used by contains_nan and contains_inf
"""
global f_gpumin, f_gpumax, f_gpuabsmax
if not cuda.cuda_available:
return
guard_input = cuda.fvector('nan_guard')
cuda_compile_failed = False
if (nan_is_error or inf_is_error) and f_gpumin is None:
try:
f_gpumin = theano.function(
[guard_input], T.min(guard_input),
mode='FAST_RUN'
)
except RuntimeError:
# This can happen if cuda is available, but the
# device is in exclusive mode and used by another
# process.
cuda_compile_failed = True
if inf_is_error and not cuda_compile_failed and f_gpumax is None:
try:
f_gpumax = theano.function(
[guard_input], T.max(guard_input),
mode='FAST_RUN'
)
except RuntimeError:
# This can happen if cuda is available, but the
# device is in exclusive mode and used by another
# process.
cuda_compile_failed = True
if big_is_error and not cuda_compile_failed and f_gpuabsmax is None:
try:
f_gpuabsmax = theano.function(
[guard_input], T.max(T.abs_(guard_input)),
mode='FAST_RUN'
)
except RuntimeError:
# This can happen if cuda is available, but the
# device is in exclusive mode and used by another
# process.
cuda_compile_failed = True
def f_compute(op): def f_compute(op):
def result(inp): def result(inp):
...@@ -270,9 +201,6 @@ class NanGuardMode(Mode): ...@@ -270,9 +201,6 @@ class NanGuardMode(Mode):
assert nan_is_error or inf_is_error or big_is_error assert nan_is_error or inf_is_error or big_is_error
if cuda.cuda_enabled:
compile_gpu_func(nan_is_error, inf_is_error, big_is_error)
def do_check_on(value, nd, var=None): def do_check_on(value, nd, var=None):
""" """
Checks `value` for NaNs / Infs. If detected, raises an exception Checks `value` for NaNs / Infs. If detected, raises an exception
...@@ -304,9 +232,6 @@ class NanGuardMode(Mode): ...@@ -304,9 +232,6 @@ class NanGuardMode(Mode):
err = False err = False
if not _is_numeric_value(value, var): if not _is_numeric_value(value, var):
err = False err = False
elif cuda.cuda_available and isinstance(value, cuda.CudaNdarray):
compile_gpu_func(False, False, True)
err = (f_gpuabsmax(value.reshape(value.size)) > 1e10)
elif pygpu_available and isinstance(value, GpuArray): elif pygpu_available and isinstance(value, GpuArray):
err = (f_gpua_absmax(value.reshape(value.size)) > 1e10) err = (f_gpua_absmax(value.reshape(value.size)) > 1e10)
else: else:
......
...@@ -810,7 +810,7 @@ class SpecifyShape(gof.Op): ...@@ -810,7 +810,7 @@ class SpecifyShape(gof.Op):
We currently don't support specifying partial shape information. We currently don't support specifying partial shape information.
TODO : test this op with sparse and cuda ndarray. Do C code for them too. TODO : test this op with sparse. Do C code for them too.
""" """
......
...@@ -262,11 +262,8 @@ class ProfileStats(object): ...@@ -262,11 +262,8 @@ class ProfileStats(object):
def __init__(self, atexit_print=True, flag_time_thunks=None, def __init__(self, atexit_print=True, flag_time_thunks=None,
gpu_checks=True, **kwargs): gpu_checks=True, **kwargs):
if (gpu_checks and if (gpu_checks and
((hasattr(theano, 'sandbox') and (hasattr(theano, 'gpuarray') and
hasattr(theano.sandbox, 'cuda') and theano.gpuarray.pygpu_activated) and
theano.sandbox.cuda.cuda_enabled) or (
hasattr(theano, 'gpuarray') and
theano.gpuarray.pygpu_activated)) and
os.environ.get('CUDA_LAUNCH_BLOCKING', '0') != '1'): os.environ.get('CUDA_LAUNCH_BLOCKING', '0') != '1'):
msg = ( msg = (
"You are running the Theano profiler with CUDA enabled." "You are running the Theano profiler with CUDA enabled."
...@@ -285,9 +282,9 @@ class ProfileStats(object): ...@@ -285,9 +282,9 @@ class ProfileStats(object):
theano.gpuarray.pygpu_activated and theano.gpuarray.pygpu_activated and
not config.profiling.ignore_first_call): not config.profiling.ignore_first_call):
warnings.warn( warnings.warn(
"Theano flag profiling.ignore_first_call is False." "Theano flag profiling.ignore_first_call is False. "
" This cause bad profiling result in the new gpu" "This cause bad profiling result in the gpu "
" back-end, as sometimes we compile at the first call.") "back-end, as sometimes we compile at the first call.")
self.apply_callcount = {} self.apply_callcount = {}
self.output_size = {} self.output_size = {}
...@@ -508,8 +505,8 @@ class ProfileStats(object): ...@@ -508,8 +505,8 @@ class ProfileStats(object):
tot += t tot += t
ftot = tot * 100 / local_time ftot = tot * 100 / local_time
# Remove the useless start and end of the class name: # Remove the useless start and end of the class name:
# "<class 'theano.sandbox.cuda.blas.GpuDot22'>" -> # "<class 'theano.gpuarray.blas.GpuDot22'>" ->
# "theano.sandbox.cuda.blas.GpuDot22" # "theano.gpuarray.blas.GpuDot22"
class_name = str(a)[8:-2][:maxlen] class_name = str(a)[8:-2][:maxlen]
print(format_str % (f, ftot, t, t / nb_call, print(format_str % (f, ftot, t, t / nb_call,
impl, nb_call, impl, nb_call,
...@@ -820,7 +817,8 @@ class ProfileStats(object): ...@@ -820,7 +817,8 @@ class ProfileStats(object):
new allocation. new allocation.
""" """
from theano.sandbox.cuda import CudaNdarrayType from theano.gpuarray import GpuArrayType
# Initial Mem info values [CPU, GPU] # Initial Mem info values [CPU, GPU]
node_memory_size = [0, 0] node_memory_size = [0, 0]
running_memory_size = [0, 0] running_memory_size = [0, 0]
...@@ -870,7 +868,7 @@ class ProfileStats(object): ...@@ -870,7 +868,7 @@ class ProfileStats(object):
# allocated by the node # allocated by the node
idx2 = 0 idx2 = 0
for out in node.outputs: for out in node.outputs:
if isinstance(out.type, CudaNdarrayType): if isinstance(out.type, GpuArrayType):
cg = 1 cg = 1
else: else:
cg = 0 cg = 0
...@@ -912,7 +910,7 @@ class ProfileStats(object): ...@@ -912,7 +910,7 @@ class ProfileStats(object):
for ins in set(node.inputs): for ins in set(node.inputs):
assert not (ins in view_of and viewed_by[ins]) assert not (ins in view_of and viewed_by[ins])
# we trac the original var, so this shouldn't happen # we trac the original var, so this shouldn't happen
if isinstance(ins.type, CudaNdarrayType): if isinstance(ins.type, GpuArrayType):
cg = 1 cg = 1
else: else:
cg = 0 cg = 0
...@@ -1245,16 +1243,6 @@ class ProfileStats(object): ...@@ -1245,16 +1243,6 @@ class ProfileStats(object):
print("---", file=file) print("---", file=file)
if (hasattr(theano, 'sandbox') and
hasattr(theano.sandbox, 'cuda') and
hasattr(theano.sandbox.cuda, 'cuda_ndarray') and
hasattr(theano.sandbox.cuda.cuda_ndarray.cuda_ndarray,
'theano_allocated')):
cuda_ndarray = theano.sandbox.cuda.cuda_ndarray.cuda_ndarray
_, gpu_max = cuda_ndarray.theano_allocated()
print(" Max Memory allocated on the GPU (for all functions): "
"%dKB" % int(round(gpu_max / 1024.)), file=file)
print("", file=file) print("", file=file)
if len(fct_memory) > 1: if len(fct_memory) > 1:
print(" This list is based on all functions in the profile", print(" This list is based on all functions in the profile",
...@@ -1457,7 +1445,6 @@ class ProfileStats(object): ...@@ -1457,7 +1445,6 @@ class ProfileStats(object):
printed_tip = True printed_tip = True
# tip 7 # tip 7
import theano.sandbox.cuda as cuda
from theano.tensor.nnet import LogSoftmax from theano.tensor.nnet import LogSoftmax
import theano.tensor.signal.pool as pool import theano.tensor.signal.pool as pool
import theano.gpuarray import theano.gpuarray
...@@ -1465,12 +1452,12 @@ class ProfileStats(object): ...@@ -1465,12 +1452,12 @@ class ProfileStats(object):
for a in self.apply_time: for a in self.apply_time:
node = a node = a
if (isinstance(node.op, pool.Pool)): if (isinstance(node.op, pool.Pool)):
if (not cuda.dnn.dnn_available() and not theano.gpuarray.dnn.dnn_present()): if not theano.gpuarray.dnn.dnn_present():
print("Install CuDNN to do pooling faster" print("Install CuDNN to do pooling faster"
"this allows the operation to run on GPU") "this allows the operation to run on GPU")
printed_tip = True printed_tip = True
if (isinstance(node.op, LogSoftmax)): if (isinstance(node.op, LogSoftmax)):
if (not cuda.dnn.dnn_available() and not theano.gpuarray.dnn.dnn_present()): if not theano.gpuarray.dnn.dnn_present():
print("Install CuDNN to do LogSoftmax faster" print("Install CuDNN to do LogSoftmax faster"
"this allows the operation to run on GPU") "this allows the operation to run on GPU")
printed_tip = True printed_tip = True
......
...@@ -713,7 +713,6 @@ class VecAsRowAndCol(gof.Op): ...@@ -713,7 +713,6 @@ class VecAsRowAndCol(gof.Op):
if (c[0] is None) or (c[0].shape != (lv, 1)): if (c[0] is None) or (c[0].shape != (lv, 1)):
c[0] = node.outputs[1].type.value_zeros((lv, 1)) c[0] = node.outputs[1].type.value_zeros((lv, 1))
# Python loop because CudaNdarrays do not support newaxis
for i in range(lv): for i in range(lv):
r[0][0, i] = v[i] r[0][0, i] = v[i]
c[0][i, 0] = v[i] c[0][i, 0] = v[i]
...@@ -794,24 +793,3 @@ class Test_preallocated_output(unittest.TestCase): ...@@ -794,24 +793,3 @@ class Test_preallocated_output(unittest.TestCase):
v_val = self.rng.randn(5).astype('float32') v_val = self.rng.randn(5).astype('float32')
f(v_val) f(v_val)
def test_output_broadcast_cuda(self):
from theano.sandbox import cuda
if not cuda.cuda_available:
raise SkipTest("Optional package Cuda disabled")
if cuda.use.device_number is None:
# We should normally set VecAsRowAndCol as a GPUOp But we
# don't want to do this here as this will disable others
# tests in this file. So we manually init the GPU if
# needed to remove warning.
cuda.use("gpu",
force=True,
default_to_move_computation_to_gpu=False,
move_shared_float32_to_gpu=False,
enable_cuda=False)
v = cuda.fvector('v')
c, r = VecAsRowAndCol()(v)
f = theano.function([v], [c, r])
v_val = cuda.CudaNdarray(self.rng.randn(5).astype('float32'))
f(v_val)
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论