提交 69338f33 authored 作者: abergeron's avatar abergeron

Merge pull request #4454 from slefrancois/gpu_out_sandbox

Move new GPU backend out of sandbox
.. _libdoc_gpuarray_dnn:
===========================================
:mod:`theano.sandbox.gpuarray.dnn` -- cuDNN
:mod:`gpuarray.dnn` -- cuDNN
===========================================
.. moduleauthor:: LISA
......@@ -135,27 +135,27 @@ To get an error if Theano can not use cuDNN, use this Theano flag:
Functions
=========
.. automodule:: theano.sandbox.gpuarray.dnn
.. automodule:: theano.gpuarray.dnn
:noindex:
:members: dnn_conv, dnn_pool
Convolution Ops
===============
.. automodule:: theano.sandbox.gpuarray.dnn
.. automodule:: theano.gpuarray.dnn
:noindex:
:members: GpuDnnConvDesc, GpuDnnConv, GpuDnnConvGradW, GpuDnnConvGradI
Pooling Ops
===========
.. automodule:: theano.sandbox.gpuarray.dnn
.. automodule:: theano.gpuarray.dnn
:noindex:
:members: GpuDnnPoolDesc, GpuDnnPool, GpuDnnPoolGrad
Softmax Ops
===========
.. automodule:: theano.sandbox.gpuarray.dnn
.. automodule:: theano.gpuarray.dnn
:noindex:
:members: GpuDnnSoftmax, GpuDnnSoftmaxGrad
......@@ -7,11 +7,11 @@ Utility functions
Optimisation
------------
.. automodule:: theano.sandbox.gpuarray.opt_util
.. automodule:: theano.gpuarray.opt_util
:members:
Kernel generation
-----------------
.. automodule:: theano.sandbox.gpuarray.kernel_codegen
.. automodule:: theano.gpuarray.kernel_codegen
:members:
......@@ -2,10 +2,10 @@
.. _libdoc_gpuarray:
=======================================================
:mod:`theano.sandbox.gpuarray` -- The (new) GPU backend
:mod:`gpuarray` -- The (new) GPU backend
=======================================================
.. module:: theano.sandbox.gpuarray
.. module:: theano.gpuarray
:platform: Unix, Windows
:synopsis: Code for GPU programming (new)
.. moduleauthor:: MILA
......
......@@ -13,35 +13,35 @@ is just useful to let people know what is implemented on the gpu.
Basic Op
========
.. automodule:: theano.sandbox.gpuarray.basic_ops
.. automodule:: theano.gpuarray.basic_ops
:members:
Blas Op
=======
.. automodule:: theano.sandbox.gpuarray.blas
.. automodule:: theano.gpuarray.blas
:members:
.. automodule:: theano.sandbox.gpuarray.nerv
.. automodule:: theano.gpuarray.nerv
:members:
Elemwise Op
===========
.. automodule:: theano.sandbox.gpuarray.elemwise
.. automodule:: theano.gpuarray.elemwise
:members:
Subtensor Op
============
.. automodule:: theano.sandbox.gpuarray.subtensor
.. automodule:: theano.gpuarray.subtensor
:members:
Nnet Op
=======
.. automodule:: theano.sandbox.gpuarray.nnet
.. automodule:: theano.gpuarray.nnet
:members:
.. automodule:: theano.sandbox.gpuarray.neighbours
.. automodule:: theano.gpuarray.neighbours
:members:
.. _libdoc_gpuarray_type:
===================================================
:mod:`theano.sandbox.gpuarray.type` -- Type classes
:mod:`gpuarray.type` -- Type classes
===================================================
.. automodule:: theano.sandbox.gpuarray.type
.. automodule:: theano.gpuarray.type
:members:
......@@ -17,6 +17,7 @@ Types and Ops that you can use to build and compile expression graphs.
printing
d3viz/index
compile/index
gpuarray/index
sparse/index
sparse/sandbox
scalar/index
......
......@@ -14,7 +14,6 @@
:maxdepth: 1
cuda/index
gpuarray/index
linalg
neighbours
rng_mrg
......@@ -393,7 +393,7 @@ into a file and run it.
.. testcode::
from theano import function, config, shared, tensor, sandbox
from theano import function, config, shared, tensor
import numpy
import time
......@@ -461,7 +461,7 @@ the GPU object directly. The following code is modifed to do just that.
.. testcode::
from theano import function, config, shared, tensor, sandbox
from theano import function, config, shared, tensor, gpuarray
import numpy
import time
......@@ -470,7 +470,7 @@ the GPU object directly. The following code is modifed to do just that.
rng = numpy.random.RandomState(22)
x = shared(numpy.asarray(rng.rand(vlen), config.floatX))
f = function([], sandbox.gpuarray.basic_ops.gpu_from_host(tensor.exp(x)))
f = function([], gpuarray.basic_ops.GpuFromHost(None)(tensor.exp(x)))
print(f.maker.fgraph.toposort())
t0 = time.time()
for i in range(iters):
......@@ -485,9 +485,10 @@ the GPU object directly. The following code is modifed to do just that.
else:
print('Used the gpu')
Here the :func:`theano.sandbox.gpuarray.basic.gpu_from_host` call
means "copy input to the GPU". However during the optimization phase,
since the result will already be on th gpu, it will be removed. It is
Here the :func:`theano.gpuarray.basic_ops.GpuFromHost(None)` call
means "copy input to the GPU", with ``None`` the default GPU context when not
explicitly given. However during the optimization phase,
since the result will already be on the gpu, it will be removed. It is
used here to tell theano that we want the result on the GPU.
The output is
......
......@@ -116,7 +116,7 @@ if (config.device.startswith('cuda') or
config.init_gpu_device.startswith('cuda') or
config.init_gpu_device.startswith('opencl') or
config.contexts != ''):
import theano.sandbox.gpuarray
import theano.gpuarray
# Use config.numpy to call numpy.seterr
import numpy
......
from __future__ import absolute_import, print_function, division
import sys
import logging
import sys
import warnings
import theano
from theano import config
from theano.compile import optdb
from theano.tensor.basic import register_transfer
_logger_name = 'theano.gpuarray'
_logger = logging.getLogger(_logger_name)
error = _logger.error
info = _logger.info
pygpu_activated = False
try:
import pygpu
import pygpu.gpuarray
except ImportError:
pygpu = None
# This is for documentation not to depend on the availability of pygpu
from .type import (GpuArrayType, GpuArrayVariable, GpuArrayConstant,
GpuArraySharedVariable, gpuarray_shared_constructor,
reg_context, get_context, ContextNotDefined)
from .basic_ops import as_gpuarray_variable
from . import dnn, opt, nerv, extra_ops
def transfer(x, target):
try:
get_context(target)
return as_gpuarray_variable(x, target)
except ContextNotDefined:
pass
register_transfer(transfer)
def init_dev(dev, name=None):
v = pygpu.gpuarray.api_version()
if v[0] != -9998:
raise RuntimeError("Wrong major API version for gpuarray:", v[0],
"Make sure Theano and libgpuarray/pygpu "
"are in sync.")
if v[1] < 0:
raise RuntimeError("Wrong minor API version for gpuarray:", v[1],
"Please update libgpuarray/pygpu.")
global pygpu_activated
if dev not in init_dev.devmap:
ctx = pygpu.init(dev)
init_dev.devmap[dev] = ctx
if config.gpuarray.preallocate != 0:
if config.gpuarray.preallocate < 1:
gmem = min(config.gpuarray.preallocate, 0.98) * ctx.total_gmem
else:
gmem = config.gpuarray.preallocate * (1024*1024)
# This will allocate and immediatly free an object of size gmem
# which will reserve that amount of memory on the GPU.
pygpu.empty((gmem,), dtype='int8', context=ctx)
context = init_dev.devmap[dev]
# This will map the context name to the real context object.
reg_context(name, context)
pygpu_activated = True
if config.print_active_device:
warn = None
cudnn_version = ""
if dev.startswith('cuda'):
cudnn_version = " (cuDNN not available)"
try:
cudnn_version = dnn.version()
# 5100 should not print warning with cudnn 5 final.
if cudnn_version > 5100:
warn = ("Your cuDNN version is more recent than Theano."
" If you see problems, try updating Theano or"
" downgrading cuDNN to version 5.")
cudnn_version = " (cuDNN version %s)" % cudnn_version
except Exception:
cudnn_version = dnn.dnn_present.msg
print("Mapped name %s to device %s: %s%s" % (
name, dev, context.devname, cudnn_version),
file=sys.stderr)
if warn:
warnings.warn(warn)
# This maps things like 'cuda0' to the context object on that device.
init_dev.devmap = {}
if pygpu:
try:
if (config.device.startswith('cuda') or
config.device.startswith('opencl')):
init_dev(config.device)
import theano.compile
theano.compile.shared_constructor(gpuarray_shared_constructor)
optdb.add_tags('gpuarray_opt', 'fast_run', 'fast_compile')
optdb.add_tags('gpua_scanOp_make_inplace', 'fast_run')
elif (config.init_gpu_device.startswith('cuda') or
config.init_gpu_device.startswith('opencl')):
if config.device != 'cpu':
raise ValueError('you must set device=cpu to use init_gpu_device.')
if config.contexts != '':
print("Using contexts will make init_gpu_device act like device and move all computations by default, which might not be what you want.")
init_dev(config.init_gpu_device)
if config.contexts != '':
for n, d in (c.split('->') for c in config.contexts.split(';')):
init_dev(d.strip(), n.strip())
import theano.compile
theano.compile.shared_constructor(gpuarray_shared_constructor)
optdb.add_tags('gpuarray_opt', 'fast_run', 'fast_compile')
optdb.add_tags('gpua_scanOp_make_inplace', 'fast_run')
from .basic_ops import (GpuAlloc, GpuAllocEmpty, GpuContiguous, GpuEye,
GpuFromHost, GpuJoin, GpuReshape, GpuSplit,
HostFromGpu)
from .basic_ops import host_from_gpu, GpuFromHost
from .elemwise import GpuElemwise
from .subtensor import (GpuSubtensor, GpuIncSubtensor,
GpuAdvancedIncSubtensor1)
except Exception:
error("Could not initialize pygpu, support disabled", exc_info=True)
else:
if (config.init_gpu_device.startswith('cuda') or
config.init_gpu_device.startswith('opencl') or
config.device.startswith('opencl') or
config.device.startswith('cuda') or
config.contexts != ''):
error("pygpu was configured but could not be imported", exc_info=True)
......@@ -12,7 +12,7 @@ from theano.gradient import grad_undefined
from .type import gpu_context_type
from .basic_ops import as_gpuarray_variable, infer_context_name
_logger = logging.getLogger('theano.sandbox.gpuarray.blocksparse')
_logger = logging.getLogger('theano.gpuarray.blocksparse')
class GpuSparseBlockGemv(COp):
......
......@@ -12,7 +12,7 @@ import theano.sandbox.multinomial
from theano import Apply, config
from theano.gof import Op
from theano.tensor import NotScalarConstantError, get_scalar_constant_value
from theano.sandbox import gpuarray
from theano import gpuarray
from .basic_ops import as_gpuarray_variable, infer_context_name
from .opt import register_opt, op_lifter
from .type import GpuArrayType
......
......@@ -46,7 +46,7 @@ from .subtensor import (GpuIncSubtensor, GpuSubtensor,
GpuAdvancedIncSubtensor1_dev20)
from .opt_util import alpha_merge, output_merge
_logger = logging.getLogger("theano.sandbox.gpuarray.opt")
_logger = logging.getLogger("theano.gpuarray.opt")
gpu_optimizer = EquilibriumDB()
gpu_cut_copies = EquilibriumDB()
......
ctheano.sandbox.gpuarray.type
ctheano.gpuarray.type
GpuArray_unpickler
p0
(cnumpy.core.multiarray
......
from __future__ import absolute_import, print_function, division
from nose.plugins.skip import SkipTest
import theano.sandbox.gpuarray
import theano.gpuarray
if theano.sandbox.gpuarray.pygpu is None:
if theano.gpuarray.pygpu is None:
raise SkipTest("pygpu not installed")
if (not theano.sandbox.gpuarray.pygpu_activated and
if (not theano.gpuarray.pygpu_activated and
not theano.config.init_gpu_device.startswith('gpu')):
theano.sandbox.gpuarray.init_dev('cuda')
theano.gpuarray.init_dev('cuda')
if not theano.sandbox.gpuarray.pygpu_activated:
if not theano.gpuarray.pygpu_activated:
raise SkipTest("pygpu disabled")
test_ctx_name = None
......
......@@ -302,7 +302,7 @@ class G_reshape(test_basic.T_reshape):
mode=mode_with_gpu,
ignore_topo=(HostFromGpu, GpuFromHost,
theano.compile.DeepCopyOp,
theano.sandbox.gpuarray.elemwise.GpuElemwise,
theano.gpuarray.elemwise.GpuElemwise,
theano.tensor.opt.Shape_i,
theano.tensor.opt.MakeVector))
assert self.op == GpuReshape
......@@ -405,7 +405,7 @@ def test_hostfromgpu_shape_i():
'local_dot22_to_dot22scalar',
'specialize')
a = T.fmatrix('a')
ca = theano.sandbox.gpuarray.type.GpuArrayType('float32', (False, False))()
ca = theano.gpuarray.type.GpuArrayType('float32', (False, False))()
av = numpy.asarray(numpy.random.rand(5, 4), dtype='float32')
cv = gpuarray.asarray(numpy.random.rand(5, 4),
dtype='float32',
......
......@@ -7,7 +7,7 @@ from theano.tests.breakpoint import PdbBreakpoint
from theano.tests import unittest_tools as utt, test_ifelse
from theano.tensor.tests import test_basic
import theano.sandbox.gpuarray
import theano.gpuarray
from .. import basic_ops
from ..type import GpuArrayType, gpuarray_shared_constructor, get_context
from ..basic_ops import (
......
......@@ -14,24 +14,15 @@ from nose.plugins.skip import SkipTest
from nose.tools import assert_raises
import numpy
import theano.sandbox.gpuarray
from theano.compat import PY3
from theano import config
from theano.misc.pkl_utils import CompatUnpickler
if not theano.sandbox.gpuarray.pygpu_activated:
try:
import pygpu
except ImportError:
pygpu = None
import theano.sandbox.cuda as cuda_ndarray
if pygpu and cuda_ndarray.cuda_available:
cuda_ndarray.use('gpu', default_to_move_computation_to_gpu=False,
move_shared_float32_to_gpu=False,
enable_cuda=False)
theano.sandbox.gpuarray.init_dev('cuda')
from .. import pygpu_activated # noqa
try:
from . import config # noqa
have_pygpu = True
except SkipTest:
have_pygpu = False
def test_unpickle_gpuarray_as_numpy_ndarray_flag1():
......@@ -40,8 +31,8 @@ def test_unpickle_gpuarray_as_numpy_ndarray_flag1():
test_type.py test it when pygpu is there.
"""
if pygpu_activated:
raise SkipTest("pygpu disabled")
if have_pygpu:
raise SkipTest("pygpu active")
oldflag = config.experimental.unpickle_gpu_on_cpu
config.experimental.unpickle_gpu_on_cpu = False
......
......@@ -233,7 +233,7 @@ class GpuArrayType(Type):
return data
def filter_variable(self, other, allow_convert=True):
from theano.sandbox.gpuarray import GpuFromHost
from theano.gpuarray import GpuFromHost
if hasattr(other, '_as_GpuArrayVariable'):
other = other._as_GpuArrayVariable(self.context_name)
......
......@@ -12,8 +12,8 @@ import time
import numpy
import theano
from theano.sandbox.gpuarray import init_dev
from theano.sandbox.gpuarray.blas import gpu_dot22
from theano.gpuarray import init_dev
from theano.gpuarray.blas import gpu_dot22
def main(dev1, dev2):
......
......@@ -19,7 +19,7 @@ except ImportError:
return False
from theano.sandbox import cuda
from theano.sandbox import gpuarray
from theano import gpuarray
if cuda.cuda_available:
from theano.sandbox.cuda.type import CudaNdarrayType
......
from __future__ import absolute_import, print_function, division
import sys
import logging
import sys
import warnings
import theano
from theano import config
from theano.compile import optdb
from theano.tensor.basic import register_transfer
_logger_name = 'theano.sandbox.gpuarray'
_logger = logging.getLogger(_logger_name)
error = _logger.error
info = _logger.info
pygpu_activated = False
try:
import pygpu
import pygpu.gpuarray
except ImportError:
pygpu = None
# This is for documentation not to depend on the availability of pygpu
from .type import (GpuArrayType, GpuArrayVariable, GpuArrayConstant,
GpuArraySharedVariable, gpuarray_shared_constructor,
reg_context, get_context, ContextNotDefined)
from .basic_ops import as_gpuarray_variable
from . import dnn, opt, nerv, extra_ops
"""Placeholder for new gpuarray backend in sandbox. Supports old pickles
which refered to theano.sandbox.gpuarray."""
def transfer(x, target):
try:
get_context(target)
return as_gpuarray_variable(x, target)
except ContextNotDefined:
pass
register_transfer(transfer)
def init_dev(dev, name=None):
v = pygpu.gpuarray.api_version()
if v[0] != -9998:
raise RuntimeError("Wrong major API version for gpuarray:", v[0],
"Make sure Theano and libgpuarray/pygpu "
"are in sync.")
if v[1] < 0:
raise RuntimeError("Wrong minor API version for gpuarray:", v[1],
"Please update libgpuarray/pygpu.")
global pygpu_activated
if dev not in init_dev.devmap:
ctx = pygpu.init(dev)
init_dev.devmap[dev] = ctx
if config.gpuarray.preallocate != 0:
if config.gpuarray.preallocate < 1:
gmem = min(config.gpuarray.preallocate, 0.98) * ctx.total_gmem
else:
gmem = config.gpuarray.preallocate * (1024*1024)
# This will allocate and immediatly free an object of size gmem
# which will reserve that amount of memory on the GPU.
pygpu.empty((gmem,), dtype='int8', context=ctx)
context = init_dev.devmap[dev]
# This will map the context name to the real context object.
reg_context(name, context)
pygpu_activated = True
if config.print_active_device:
warn = None
cudnn_version = ""
if dev.startswith('cuda'):
cudnn_version = " (cuDNN not available)"
try:
cudnn_version = dnn.version()
# 5100 should not print warning with cudnn 5 final.
if cudnn_version > 5100:
warn = ("Your cuDNN version is more recent than Theano."
" If you see problems, try updating Theano or"
" downgrading cuDNN to version 5.")
cudnn_version = " (cuDNN version %s)" % cudnn_version
except Exception:
cudnn_version = dnn.dnn_present.msg
print("Mapped name %s to device %s: %s%s" % (
name, dev, context.devname, cudnn_version),
file=sys.stderr)
if warn:
warnings.warn(warn)
# This maps things like 'cuda0' to the context object on that device.
init_dev.devmap = {}
if pygpu:
try:
if (config.device.startswith('cuda') or
config.device.startswith('opencl')):
init_dev(config.device)
import theano.compile
theano.compile.shared_constructor(gpuarray_shared_constructor)
optdb.add_tags('gpuarray_opt', 'fast_run', 'fast_compile')
optdb.add_tags('gpua_scanOp_make_inplace', 'fast_run')
elif (config.init_gpu_device.startswith('cuda') or
config.init_gpu_device.startswith('opencl')):
if config.device != 'cpu':
raise ValueError('you must set device=cpu to use init_gpu_device.')
if config.contexts != '':
print("Using contexts will make init_gpu_device act like device and move all computations by default, which might not be what you want.")
init_dev(config.init_gpu_device)
if config.contexts != '':
for n, d in (c.split('->') for c in config.contexts.split(';')):
init_dev(d.strip(), n.strip())
import theano.compile
theano.compile.shared_constructor(gpuarray_shared_constructor)
optdb.add_tags('gpuarray_opt', 'fast_run', 'fast_compile')
optdb.add_tags('gpua_scanOp_make_inplace', 'fast_run')
from .basic_ops import (GpuAlloc, GpuAllocEmpty, GpuContiguous, GpuEye,
GpuFromHost, GpuJoin, GpuReshape, GpuSplit,
HostFromGpu)
from .basic_ops import host_from_gpu, GpuFromHost
from .elemwise import GpuElemwise
from .subtensor import (GpuSubtensor, GpuIncSubtensor,
GpuAdvancedIncSubtensor1)
import warnings
from theano.gpuarray import *
except Exception:
error("Could not initialize pygpu, support disabled", exc_info=True)
else:
if (config.init_gpu_device.startswith('cuda') or
config.init_gpu_device.startswith('opencl') or
config.device.startswith('opencl') or
config.device.startswith('cuda') or
config.contexts != ''):
error("pygpu was configured but could not be imported", exc_info=True)
message = "theano.sandbox.gpuarray has been moved to theano.gpuarray." + \
" Please update your code and pickles."
warnings.warn(message)
from __future__ import absolute_import, print_function, division
import os
import numpy
import theano
from theano import config
# This is a big hack to avoid creating a second context on the card.
from theano.sandbox.cuda.nvcc_compiler import (NVCC_compiler as NVCC_base,
hash_from_file)
class NVCC_compiler(NVCC_base):
@staticmethod
def compile_args():
"""
Re-implementation of compile_args that does not create an
additionnal context on the GPU.
"""
flags = [flag for flag in config.nvcc.flags.split(' ') if flag]
if config.nvcc.fastmath:
flags.append('-use_fast_math')
cuda_ndarray_cuh_hash = hash_from_file(
os.path.join(os.path.split(theano.sandbox.cuda.__file__)[0],
'cuda_ndarray.cuh'))
flags.append('-DCUDA_NDARRAY_CUH=' + cuda_ndarray_cuh_hash)
# numpy 1.7 deprecated the following macros but they didn't
# exist in the past
numpy_ver = [int(n) for n in numpy.__version__.split('.')[:2]]
if bool(numpy_ver < [1, 7]):
flags.append("-DNPY_ARRAY_ENSURECOPY=NPY_ENSURECOPY")
flags.append("-DNPY_ARRAY_ALIGNED=NPY_ALIGNED")
flags.append("-DNPY_ARRAY_WRITEABLE=NPY_WRITEABLE")
flags.append("-DNPY_ARRAY_UPDATE_ALL=NPY_UPDATE_ALL")
flags.append("-DNPY_ARRAY_C_CONTIGUOUS=NPY_C_CONTIGUOUS")
flags.append("-DNPY_ARRAY_F_CONTIGUOUS=NPY_F_CONTIGUOUS")
# If the user didn't specify architecture flags add them
if not any(['-arch=sm_' in f for f in flags]):
dev = theano.sandbox.gpuarray.init_dev.device
if dev is None:
raise Exception("Trying to compile GPU code without a context")
if dev.startswith("opencl"):
raise Exception("Trying to call nvcc with an OpenCL context")
assert dev.startswith('cuda')
if dev == 'cuda':
n = theano.sandbox.cuda.use.device_number
else:
n = int(dev[4:])
p = theano.sandbox.cuda.device_properties(n)
flags.append('-arch=sm_' + str(p['major']) + str(p['minor']))
return flags
......@@ -24,11 +24,11 @@ from . import multinomial
import theano.sandbox.cuda
from theano.sandbox.cuda import GpuOp
from theano.sandbox.gpuarray.basic_ops import GpuKernelBase, Kernel
from theano.sandbox.gpuarray.type import GpuArrayType
from theano.sandbox.gpuarray.fp16_help import write_w
from theano.sandbox.gpuarray.opt import (register_opt as register_gpua,
host_from_gpu as host_from_gpua)
from theano.gpuarray.basic_ops import GpuKernelBase, Kernel
from theano.gpuarray.type import GpuArrayType
from theano.gpuarray.fp16_help import write_w
from theano.gpuarray.opt import (register_opt as register_gpua,
host_from_gpu as host_from_gpua)
if theano.sandbox.cuda.cuda_available:
from theano.sandbox.cuda import (CudaNdarrayType,
float32_shared_constructor)
......
......@@ -366,9 +366,9 @@ def test_consistency_GPUA_serial():
are the same as the reference (Java) implementation by L'Ecuyer et al.
"""
from theano.sandbox.gpuarray.tests.test_basic_ops import \
from theano.gpuarray.tests.test_basic_ops import \
mode_with_gpu as mode
from theano.sandbox.gpuarray.type import gpuarray_shared_constructor
from theano.gpuarray.type import gpuarray_shared_constructor
seed = 12345
n_samples = 5
......@@ -421,9 +421,9 @@ def test_consistency_GPUA_parallel():
L'Ecuyer et al.
"""
from theano.sandbox.gpuarray.tests.test_basic_ops import \
from theano.gpuarray.tests.test_basic_ops import \
mode_with_gpu as mode
from theano.sandbox.gpuarray.type import gpuarray_shared_constructor
from theano.gpuarray.type import gpuarray_shared_constructor
seed = 12345
n_samples = 5
......@@ -1107,9 +1107,9 @@ def test_overflow_gpu_old_backend():
def test_overflow_gpu_new_backend():
# run with THEANO_FLAGS=mode=FAST_RUN,init_gpu_device=cuda1,device=cpu
from theano.sandbox.gpuarray.tests.test_basic_ops import \
from theano.gpuarray.tests.test_basic_ops import \
mode_with_gpu as mode
from theano.sandbox.gpuarray.type import gpuarray_shared_constructor
from theano.gpuarray.type import gpuarray_shared_constructor
seed = 12345
n_substreams = 7
curr_rstate = numpy.array([seed] * 6, dtype='int32')
......
......@@ -982,7 +982,8 @@ def scan(fn,
# the file because that would force on the user some dependencies that we
# might do not want to. Currently we are working on removing the
# dependencies on sandbox code completeley.
from theano.sandbox import cuda, gpuarray
from theano.sandbox import cuda
from theano import gpuarray
if cuda.cuda_available or gpuarray.pygpu_activated:
# very often we end up in this situation when we want to
# replace w with w_copy, where w is a GPU variable
......
......@@ -272,7 +272,7 @@ class Scan(PureOp):
# If scan has the flag 'gpua' set to false (meaning that is shouldn't
# use the gpuarray gpu backend ), ensure that is has no input and no
# output with type GpuArrayType
from theano.sandbox.gpuarray import GpuArrayType
from theano.gpuarray import GpuArrayType
if not self.info.get("gpua", False):
for inp in self.inputs:
if isinstance(inp.type, GpuArrayType):
......
......@@ -1008,8 +1008,8 @@ class ScanInplaceOptimizer(Optimizer):
# gpuarray might be imported but not its GpuAlloc and
# GpuAllopEmpty ops.
try:
alloc_ops += (theano.sandbox.gpuarray.GpuAlloc,
theano.sandbox.gpuarray.GpuAllocEmpty)
alloc_ops += (theano.gpuarray.GpuAlloc,
theano.gpuarray.GpuAllocEmpty)
except:
pass
......
......@@ -151,7 +151,8 @@ def traverse(out, x, x_copy, d, visited=None):
if out in visited:
return d
visited.add(out)
from theano.sandbox import cuda, gpuarray
from theano.sandbox import cuda
from theano import gpuarray
if out == x:
if isinstance(x.type, cuda.CudaNdarrayType):
d[out] = cuda.gpu_from_host(x_copy)
......
......@@ -4939,7 +4939,7 @@ class T_Scan_Gpuarray(unittest.TestCase, ScanGpuTests):
"""
def __init__(self, *args, **kwargs):
from theano.sandbox import gpuarray
from theano import gpuarray
self.gpu_backend = gpuarray
# This is unfortunate, but required
......
......@@ -39,6 +39,8 @@ whitelist_flake8 = [
"compile/profiling.py",
"compile/sandbox/__init__.py",
"compile/tests/__init__.py",
"gpuarray/__init__.py",
"gpuarray/tests/__init__.py",
"typed_list/__init__.py",
"typed_list/tests/__init__.py",
"tensor/__init__.py",
......@@ -89,7 +91,7 @@ whitelist_flake8 = [
"sandbox/tests/__init__.py",
"sandbox/cuda/__init__.py",
"sandbox/cuda/tests/__init__.py",
"sandbox/gpuarray/tests/__init__.py",
"sandbox/gpuarray/__init__.py",
"sandbox/scan_module/scan_utils.py",
"sandbox/scan_module/scan.py",
"sandbox/scan_module/scan_op.py",
......@@ -100,7 +102,6 @@ whitelist_flake8 = [
"sandbox/linalg/__init__.py",
"sandbox/linalg/tests/__init__.py",
"sandbox/linalg/tests/test_linalg.py",
"sandbox/gpuarray/__init__.py",
"scan_module/scan_utils.py",
"scan_module/scan_views.py",
"scan_module/scan.py",
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论