提交 8aa08ca2 authored 作者: lamblin's avatar lamblin

Merge pull request #450 from nouiz/gpusum

Test nvidia driver
...@@ -99,6 +99,11 @@ import gof ...@@ -99,6 +99,11 @@ import gof
if config.device.startswith('gpu') or config.init_gpu_device.startswith('gpu'): if config.device.startswith('gpu') or config.init_gpu_device.startswith('gpu'):
import theano.sandbox.cuda import theano.sandbox.cuda
# We can't test the driver during import of theano.sandbox.cuda as
# this cause circular import dependency. So we also test it manually
# after the import
import theano.sandbox.cuda.tests.test_driver
theano.sandbox.cuda.tests.test_driver.test_nvidia_driver1()
# Use config.numpy to call numpy.seterr # Use config.numpy to call numpy.seterr
import numpy import numpy
......
...@@ -7,9 +7,9 @@ from theano.sandbox.cuda.basic_ops import as_cuda_ndarray_variable, host_from_gp ...@@ -7,9 +7,9 @@ from theano.sandbox.cuda.basic_ops import as_cuda_ndarray_variable, host_from_gp
from theano.misc import strutil from theano.misc import strutil
from theano.tensor.nnet.Conv3D import Conv3D from theano.tensor.nnet.Conv3D import Conv3D
from theano.sandbox.cuda.opt import register_opt from theano.sandbox.cuda.opt import register_opt
from theano.sandbox.cuda import CudaNdarrayType from theano.sandbox.cuda import CudaNdarrayType, GpuOp
class GpuConv3D(theano.Op): class GpuConv3D(GpuOp):
""" GPU implementation of Conv3D """ """ GPU implementation of Conv3D """
def __eq__(self, other): def __eq__(self, other):
......
...@@ -8,11 +8,12 @@ from theano.misc import strutil ...@@ -8,11 +8,12 @@ from theano.misc import strutil
from theano.tensor.nnet.ConvGrad3D import ConvGrad3D from theano.tensor.nnet.ConvGrad3D import ConvGrad3D
from theano.sandbox.cuda.opt import register_opt from theano.sandbox.cuda.opt import register_opt
from theano.sandbox.cuda import CudaNdarrayType, HostFromGpu, host_from_gpu from theano.sandbox.cuda import (CudaNdarrayType, HostFromGpu,
host_from_gpu, GpuOp)
class GpuConvGrad3D(theano.Op): class GpuConvGrad3D(GpuOp):
""" GPU version of gradient of ConvGrad3D with respect to W """ """ GPU version of gradient of ConvGrad3D with respect to W """
def make_node(self, V, d, WShape, dCdH): def make_node(self, V, d, WShape, dCdH):
......
...@@ -9,10 +9,11 @@ from theano.gof import local_optimizer ...@@ -9,10 +9,11 @@ from theano.gof import local_optimizer
from theano.sandbox.cuda.basic_ops import as_cuda_ndarray_variable from theano.sandbox.cuda.basic_ops import as_cuda_ndarray_variable
from theano.sandbox.cuda.opt import register_opt from theano.sandbox.cuda.opt import register_opt
from theano.sandbox.cuda import CudaNdarrayType, HostFromGpu, host_from_gpu from theano.sandbox.cuda import (CudaNdarrayType, HostFromGpu,
host_from_gpu, GpuOp)
class GpuConvTransp3D(theano.Op): class GpuConvTransp3D(GpuOp):
""" The gpu version of ConvTransp3D """ """ The gpu version of ConvTransp3D """
def __eq__(self,other): def __eq__(self,other):
return type(self) == type(other) return type(self) == type(other)
......
...@@ -33,7 +33,20 @@ def as_cuda_array(obj): ...@@ -33,7 +33,20 @@ def as_cuda_array(obj):
else: else:
raise TypeError("Don't know how to cast to a CudaNdarray object") raise TypeError("Don't know how to cast to a CudaNdarray object")
class HostFromGpu(Op):
class GpuOp(Op):
def make_thunk(self, node, storage_map, compute_map, no_recycling):
if theano.sandbox.cuda.use.device_number is None:
theano.sandbox.cuda.use("gpu",
force=True,
default_to_move_computation_to_gpu=False,
move_shared_float32_to_gpu=False,
enable_cuda=False)
return super(GpuOp, self).make_thunk(node, storage_map,
compute_map, no_recycling)
class HostFromGpu(GpuOp):
""" """
Implement the transfer from gpu to the cpu. Implement the transfer from gpu to the cpu.
""" """
...@@ -65,7 +78,7 @@ class HostFromGpu(Op): ...@@ -65,7 +78,7 @@ class HostFromGpu(Op):
return xshp return xshp
host_from_gpu = HostFromGpu() host_from_gpu = HostFromGpu()
class GpuFromHost(Op): class GpuFromHost(GpuOp):
""" """
Implement the transfer from cpu to the gpu. Implement the transfer from cpu to the gpu.
""" """
...@@ -98,7 +111,8 @@ class GpuFromHost(Op): ...@@ -98,7 +111,8 @@ class GpuFromHost(Op):
return xshp return xshp
gpu_from_host = GpuFromHost() gpu_from_host = GpuFromHost()
class GpuElemwise(Op):
class GpuElemwise(GpuOp):
""" """
Implement a generic elemwise on the gpu. Implement a generic elemwise on the gpu.
""" """
...@@ -208,7 +222,7 @@ class GpuElemwise(Op): ...@@ -208,7 +222,7 @@ class GpuElemwise(Op):
def c_code_cache_version(self): def c_code_cache_version(self):
return self.src_generator.cache_version return self.src_generator.cache_version
class GpuDimShuffle(Op): class GpuDimShuffle(GpuOp):
""" """
Implement DimShuffle on the gpu. Implement DimShuffle on the gpu.
""" """
...@@ -397,7 +411,7 @@ class GpuDimShuffle(Op): ...@@ -397,7 +411,7 @@ class GpuDimShuffle(Op):
def c_code_cache_version(self): def c_code_cache_version(self):
return (1,0) return (1,0)
class GpuSum(Op): class GpuSum(GpuOp):
"""GpuSum is a Reduction along some dimensions by summation. """GpuSum is a Reduction along some dimensions by summation.
The dimensions along which to sum is specified by the `reduce_mask` that you pass to the The dimensions along which to sum is specified by the `reduce_mask` that you pass to the
...@@ -1717,7 +1731,7 @@ class GpuSum(Op): ...@@ -1717,7 +1731,7 @@ class GpuSum(Op):
""" %locals() """ %locals()
return sio.getvalue() return sio.getvalue()
class GpuReshape(tensor.Reshape): class GpuReshape(tensor.Reshape, GpuOp):
""" """
Implement Reshape on the gpu. Implement Reshape on the gpu.
""" """
...@@ -1733,7 +1747,7 @@ class GpuReshape(tensor.Reshape): ...@@ -1733,7 +1747,7 @@ class GpuReshape(tensor.Reshape):
', should be %i' % (len(shp), self.ndim), shp) ', should be %i' % (len(shp), self.ndim), shp)
out[0] = x.reshape(tuple(shp)) out[0] = x.reshape(tuple(shp))
class GpuSubtensor(tensor.Subtensor): class GpuSubtensor(tensor.Subtensor, GpuOp):
""" """
Implement subtensor on the gpu. Implement subtensor on the gpu.
""" """
...@@ -1764,7 +1778,7 @@ class GpuSubtensor(tensor.Subtensor): ...@@ -1764,7 +1778,7 @@ class GpuSubtensor(tensor.Subtensor):
cdata = cdata[0] cdata = cdata[0]
out[0] = x.__getitem__(cdata) out[0] = x.__getitem__(cdata)
class GpuAdvancedSubtensor1(tensor.AdvancedSubtensor1): class GpuAdvancedSubtensor1(tensor.AdvancedSubtensor1, GpuOp):
""" """
Implement AdvancedSubtensor1 on the gpu. Implement AdvancedSubtensor1 on the gpu.
""" """
...@@ -1790,7 +1804,7 @@ class GpuAdvancedSubtensor1(tensor.AdvancedSubtensor1): ...@@ -1790,7 +1804,7 @@ class GpuAdvancedSubtensor1(tensor.AdvancedSubtensor1):
o[j] = x[i] o[j] = x[i]
out[0] = o out[0] = o
class GpuAdvancedIncSubtensor1(tensor.AdvancedIncSubtensor1): class GpuAdvancedIncSubtensor1(tensor.AdvancedIncSubtensor1, GpuOp):
""" """
Implement AdvancedIncSubtensor1 on the gpu. Implement AdvancedIncSubtensor1 on the gpu.
""" """
...@@ -1818,7 +1832,7 @@ class GpuAdvancedIncSubtensor1(tensor.AdvancedIncSubtensor1): ...@@ -1818,7 +1832,7 @@ class GpuAdvancedIncSubtensor1(tensor.AdvancedIncSubtensor1):
# CudaNdarray_Subscript() don't support Advanced slicing. # CudaNdarray_Subscript() don't support Advanced slicing.
# so we use the parent version that loop on each indices. # so we use the parent version that loop on each indices.
class GpuIncSubtensor(tensor.IncSubtensor): class GpuIncSubtensor(tensor.IncSubtensor, GpuOp):
""" """
Implement IncSubtensor on the gpu. Implement IncSubtensor on the gpu.
""" """
...@@ -1828,7 +1842,7 @@ class GpuIncSubtensor(tensor.IncSubtensor): ...@@ -1828,7 +1842,7 @@ class GpuIncSubtensor(tensor.IncSubtensor):
rval = tensor.IncSubtensor.make_node(self, x, y, *inputs) rval = tensor.IncSubtensor.make_node(self, x, y, *inputs)
return Apply(self, [x,y]+rval.inputs[2:], [x.type()]) return Apply(self, [x,y]+rval.inputs[2:], [x.type()])
class GpuFlatten(tensor.Flatten): class GpuFlatten(tensor.Flatten, GpuOp):
""" """
Implement Flatten on the gpu. Implement Flatten on the gpu.
""" """
...@@ -1839,7 +1853,7 @@ class GpuFlatten(tensor.Flatten): ...@@ -1839,7 +1853,7 @@ class GpuFlatten(tensor.Flatten):
out_type = CudaNdarrayType(broadcastable=host_out_broadcastable) out_type = CudaNdarrayType(broadcastable=host_out_broadcastable)
return Apply(self, [x], [out_type()]) return Apply(self, [x], [out_type()])
class GpuShape(tensor.Shape): class GpuShape(tensor.Shape, GpuOp):
""" """
Implement Shape on the gpu. Implement Shape on the gpu.
""" """
...@@ -1847,7 +1861,7 @@ class GpuShape(tensor.Shape): ...@@ -1847,7 +1861,7 @@ class GpuShape(tensor.Shape):
return Apply(self, [x], [tensor.lvector()]) return Apply(self, [x], [tensor.lvector()])
gpu_shape = GpuShape() gpu_shape = GpuShape()
class GpuJoin(tensor.Join): class GpuJoin(tensor.Join, GpuOp):
""" """
Implement Join on the gpu. Implement Join on the gpu.
""" """
...@@ -1924,7 +1938,7 @@ class GpuJoin(tensor.Join): ...@@ -1924,7 +1938,7 @@ class GpuJoin(tensor.Join):
gpu_join = GpuJoin() gpu_join = GpuJoin()
class GpuAlloc(Op): class GpuAlloc(GpuOp):
""" """
Implement Alloc on the gpu. Implement Alloc on the gpu.
""" """
...@@ -2023,7 +2037,7 @@ class GpuAlloc(Op): ...@@ -2023,7 +2037,7 @@ class GpuAlloc(Op):
gpu_alloc = GpuAlloc() gpu_alloc = GpuAlloc()
class GpuContiguous(Op): class GpuContiguous(GpuOp):
""" """
Always return a c contiguous output. Copy the input only if it is Always return a c contiguous output. Copy the input only if it is
not already c contiguous. not already c contiguous.
......
...@@ -4,8 +4,9 @@ import StringIO, os ...@@ -4,8 +4,9 @@ import StringIO, os
import cuda_ndarray.cuda_ndarray as cuda import cuda_ndarray.cuda_ndarray as cuda
from theano.sandbox.cuda.type import CudaNdarrayType from theano.sandbox.cuda.type import CudaNdarrayType
from theano.sandbox.cuda import GpuOp
class GpuDot22(Op): class GpuDot22(GpuOp):
""" """
Implement dot(2d, 2d) on the gpu. Implement dot(2d, 2d) on the gpu.
""" """
...@@ -76,7 +77,7 @@ class GpuDot22(Op): ...@@ -76,7 +77,7 @@ class GpuDot22(Op):
""" % locals() """ % locals()
gpu_dot22 = GpuDot22() gpu_dot22 = GpuDot22()
class GpuDot22Scalar(Op): class GpuDot22Scalar(GpuOp):
""" """
Implement dot(2d, 2d) * scalar on the gpu. Implement dot(2d, 2d) * scalar on the gpu.
""" """
...@@ -155,7 +156,7 @@ class GpuDot22Scalar(Op): ...@@ -155,7 +156,7 @@ class GpuDot22Scalar(Op):
""" % locals() """ % locals()
gpu_dot22scalar = GpuDot22Scalar() gpu_dot22scalar = GpuDot22Scalar()
class GpuGemm(Op): class GpuGemm(GpuOp):
""" """
implement the gemm on the gpu. implement the gemm on the gpu.
...@@ -257,7 +258,7 @@ class GpuGemm(Op): ...@@ -257,7 +258,7 @@ class GpuGemm(Op):
gpu_gemm_no_inplace = GpuGemm(inplace=False) gpu_gemm_no_inplace = GpuGemm(inplace=False)
gpu_gemm_inplace = GpuGemm(inplace=True) gpu_gemm_inplace = GpuGemm(inplace=True)
class GpuGemv(Op): class GpuGemv(GpuOp):
""" """
implement gemv on the gpu. implement gemv on the gpu.
...@@ -348,7 +349,7 @@ class GpuGemv(Op): ...@@ -348,7 +349,7 @@ class GpuGemv(Op):
gpu_gemv_no_inplace = GpuGemv(inplace=False) gpu_gemv_no_inplace = GpuGemv(inplace=False)
gpu_gemv_inplace = GpuGemv(inplace=True) gpu_gemv_inplace = GpuGemv(inplace=True)
class GpuGer(Op): class GpuGer(GpuOp):
""" """
implement ger on the gpu. implement ger on the gpu.
...@@ -439,7 +440,7 @@ class GpuGer(Op): ...@@ -439,7 +440,7 @@ class GpuGer(Op):
gpu_ger_no_inplace = GpuGer(inplace=False) gpu_ger_no_inplace = GpuGer(inplace=False)
gpu_ger_inplace = GpuGer(inplace=True) gpu_ger_inplace = GpuGer(inplace=True)
class GpuOuter(Op): class GpuOuter(GpuOp):
""" Implement outer on the gpu.""" """ Implement outer on the gpu."""
def make_node(self, x, y): def make_node(self, x, y):
# we suppose type checking has been done, but make sure. # we suppose type checking has been done, but make sure.
...@@ -532,7 +533,7 @@ gpu_outer = GpuOuter() ...@@ -532,7 +533,7 @@ gpu_outer = GpuOuter()
## ##
# Not really a BLAS operation, but whatever. # Not really a BLAS operation, but whatever.
# #
class GpuConv(Op): class GpuConv(GpuOp):
""" """
Implement the batched and stacked 2d convolution on the gpu. Implement the batched and stacked 2d convolution on the gpu.
""" """
...@@ -698,7 +699,7 @@ class GpuConv(Op): ...@@ -698,7 +699,7 @@ class GpuConv(Op):
"""%sub """%sub
class GpuDownsampleFactorMax(Op): class GpuDownsampleFactorMax(GpuOp):
""" """
Implement downsample with max on the gpu. Implement downsample with max on the gpu.
""" """
...@@ -858,7 +859,7 @@ class GpuDownsampleFactorMax(Op): ...@@ -858,7 +859,7 @@ class GpuDownsampleFactorMax(Op):
} }
""" % locals() """ % locals()
class GpuDownsampleFactorMaxGrad(Op): class GpuDownsampleFactorMaxGrad(GpuOp):
""" """
Implement the grad of downsample with max on the gpu. Implement the grad of downsample with max on the gpu.
""" """
......
...@@ -3,11 +3,12 @@ from theano import tensor, scalar ...@@ -3,11 +3,12 @@ from theano import tensor, scalar
import StringIO import StringIO
from theano.sandbox.cuda.type import CudaNdarrayType from theano.sandbox.cuda.type import CudaNdarrayType
from theano.sandbox.cuda import GpuOp
from theano.sandbox.cuda.kernel_codegen import nvcc_kernel, inline_reduce_max, inline_reduce_sum, inline_softmax from theano.sandbox.cuda.kernel_codegen import nvcc_kernel, inline_reduce_max, inline_reduce_sum, inline_softmax
class GpuCrossentropySoftmaxArgmax1HotWithBias (Op): class GpuCrossentropySoftmaxArgmax1HotWithBias (GpuOp):
""" """
Implement CrossentropySoftmaxArgmax1HotWithBias on the gpu. Implement CrossentropySoftmaxArgmax1HotWithBias on the gpu.
""" """
...@@ -180,7 +181,7 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias (Op): ...@@ -180,7 +181,7 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias (Op):
gpu_crossentropy_softmax_argmax_1hot_with_bias = GpuCrossentropySoftmaxArgmax1HotWithBias() gpu_crossentropy_softmax_argmax_1hot_with_bias = GpuCrossentropySoftmaxArgmax1HotWithBias()
class GpuCrossentropySoftmax1HotWithBiasDx (Op): class GpuCrossentropySoftmax1HotWithBiasDx (GpuOp):
""" """
Implement CrossentropySoftmax1HotWithBiasDx on the gpu. Implement CrossentropySoftmax1HotWithBiasDx on the gpu.
""" """
...@@ -302,7 +303,7 @@ class GpuCrossentropySoftmax1HotWithBiasDx (Op): ...@@ -302,7 +303,7 @@ class GpuCrossentropySoftmax1HotWithBiasDx (Op):
gpu_crossentropy_softmax_1hot_with_bias_dx = GpuCrossentropySoftmax1HotWithBiasDx() gpu_crossentropy_softmax_1hot_with_bias_dx = GpuCrossentropySoftmax1HotWithBiasDx()
class GpuSoftmax (Op): class GpuSoftmax (GpuOp):
""" """
Implement Softmax on the gpu. Implement Softmax on the gpu.
""" """
...@@ -400,7 +401,7 @@ class GpuSoftmax (Op): ...@@ -400,7 +401,7 @@ class GpuSoftmax (Op):
gpu_softmax = GpuSoftmax() gpu_softmax = GpuSoftmax()
class GpuSoftmaxWithBias (Op): class GpuSoftmaxWithBias (GpuOp):
""" """
Implement SoftmaxWithBias on the gpu. Implement SoftmaxWithBias on the gpu.
""" """
......
...@@ -10,7 +10,7 @@ __contact__ = "theano-dev@googlegroups.com" ...@@ -10,7 +10,7 @@ __contact__ = "theano-dev@googlegroups.com"
import sys import sys
import numpy import numpy
import theano.gof import theano.gof
from theano.sandbox.cuda import CudaNdarrayType from theano.sandbox.cuda import CudaNdarrayType, GpuOp
from theano.tensor import (get_vector_length, cast, opt) from theano.tensor import (get_vector_length, cast, opt)
from theano.compile import optdb from theano.compile import optdb
from theano.gof import local_optimizer, Variable from theano.gof import local_optimizer, Variable
...@@ -19,7 +19,7 @@ from theano.gof import local_optimizer, Variable ...@@ -19,7 +19,7 @@ from theano.gof import local_optimizer, Variable
config = theano.config config = theano.config
class CURAND_Base(theano.gof.Op): class CURAND_Base(GpuOp):
""" Base class for a random number generator implemented in CURAND. """ Base class for a random number generator implemented in CURAND.
The random number generator itself is an opaque reference managed by The random number generator itself is an opaque reference managed by
......
import numpy
import theano
# Skip test if cuda_ndarray is not available.
from nose.plugins.skip import SkipTest
import theano.sandbox.cuda as cuda_ndarray
if cuda_ndarray.cuda_available == False:
raise SkipTest('Optional package cuda disabled')
import theano.sandbox.cuda as cuda
import theano.sandbox.cuda.basic_ops as B
if theano.config.mode == 'FAST_COMPILE':
mode_with_gpu = theano.compile.mode.get_mode('FAST_RUN').including('gpu')
else:
mode_with_gpu = theano.compile.mode.get_default_mode().including('gpu')
def test_nvidia_driver1():
""" Some nvidia driver give bad result for reduction
This execute some reduction test to ensure it run correctly
"""
a = numpy.random.rand(10000).astype("float32")
A = cuda.shared_constructor(a)
f = theano.function(inputs=[], outputs=A.sum(), mode=mode_with_gpu)
topo = f.maker.env.toposort()
assert len(topo) == 2
assert sum(isinstance(node.op, B.GpuSum) for node in topo) == 1
if not numpy.allclose(f(), a.sum()):
raise Exception("The nvidia driver version installed with the OS "
"don't give good result for reduction."
"Installing the nvidia driver available on the same "
"download page as the cuda package will fix the "
"problem: http://developer.nvidia.com/cuda-downloads")
def test_nvidia_driver2():
""" Test that the gpu device is initialized by theano when
we manually make a shared variable on the gpu.
The driver should always be tested during theano initialization
of the gpu device
"""
a = numpy.random.rand(10000).astype("float32")
cuda.shared_constructor(a)
assert theano.sandbox.cuda.use.device_number is not None
def test_nvidia_driver3():
""" Test that the gpu device is initialized by theano when
we build a function with gpu op.
The driver should always be tested during theano initialization
of the gpu device
"""
var = cuda.fvector()
f = theano.function([var], var + 1, mode=mode_with_gpu)
topo = f.maker.env.toposort()
assert any([isinstance(node.op, cuda.GpuElemwise) for node in topo])
assert theano.sandbox.cuda.use.device_number is not None
# TODO make sure the test_nvidia_driver test are executed when we make manually
# a CudaNdarray like this: cuda.CudaNdarray.zeros((5,4))
...@@ -169,6 +169,12 @@ def cuda_shared_constructor(value, name=None, strict=False, ...@@ -169,6 +169,12 @@ def cuda_shared_constructor(value, name=None, strict=False,
def float32_shared_constructor(value, name=None, strict=False, def float32_shared_constructor(value, name=None, strict=False,
allow_downcast=None, borrow=False, broadcastable=None): allow_downcast=None, borrow=False, broadcastable=None):
"""SharedVariable Constructor for CudaNdarrayType from numpy.ndarray or CudaNdarray""" """SharedVariable Constructor for CudaNdarrayType from numpy.ndarray or CudaNdarray"""
if theano.sandbox.cuda.use.device_number is None:
theano.sandbox.cuda.use("gpu",
force=True,
default_to_move_computation_to_gpu=False,
move_shared_float32_to_gpu=False,
enable_cuda=False)
# if value isn't a float32 ndarray, or a CudaNdarray then raise # if value isn't a float32 ndarray, or a CudaNdarray then raise
......
...@@ -5,7 +5,7 @@ from theano.gof import local_optimizer ...@@ -5,7 +5,7 @@ from theano.gof import local_optimizer
from theano.sandbox.cuda import cuda_available from theano.sandbox.cuda import cuda_available
if cuda_available: if cuda_available:
from theano.sandbox.cuda import CudaNdarrayType from theano.sandbox.cuda import CudaNdarrayType, GpuOp
from theano.sandbox.cuda.basic_ops import host_from_gpu, gpu_from_host from theano.sandbox.cuda.basic_ops import host_from_gpu, gpu_from_host
from theano.sandbox.cuda.opt import register_opt from theano.sandbox.cuda.opt import register_opt
...@@ -120,7 +120,7 @@ class MultinomialFromUniform(Op): ...@@ -120,7 +120,7 @@ class MultinomialFromUniform(Op):
""" % locals() """ % locals()
class GpuMultinomialFromUniform(MultinomialFromUniform): class GpuMultinomialFromUniform(MultinomialFromUniform, GpuOp):
""" """
The output is transposed compared to MultinomialFromUniform. The output is transposed compared to MultinomialFromUniform.
We must insert a Transpose op after it. We must insert a Transpose op after it.
......
...@@ -5,7 +5,7 @@ from theano.gof import local_optimizer ...@@ -5,7 +5,7 @@ from theano.gof import local_optimizer
from theano.sandbox.cuda import cuda_available from theano.sandbox.cuda import cuda_available
if cuda_available: if cuda_available:
from theano.sandbox.cuda import CudaNdarrayType from theano.sandbox.cuda import CudaNdarrayType, GpuOp
from theano.sandbox.cuda.basic_ops import host_from_gpu, gpu_from_host from theano.sandbox.cuda.basic_ops import host_from_gpu, gpu_from_host
from theano.sandbox.cuda.opt import register_opt as register_gpu_opt from theano.sandbox.cuda.opt import register_opt as register_gpu_opt
...@@ -292,7 +292,7 @@ def neibs2images(neibs, neib_shape, original_shape, mode='valid'): ...@@ -292,7 +292,7 @@ def neibs2images(neibs, neib_shape, original_shape, mode='valid'):
# This is work in progress # This is work in progress
class GpuImages2Neibs(Images2Neibs): class GpuImages2Neibs(Images2Neibs, GpuOp):
def __init__(self, mode='valid'): def __init__(self, mode='valid'):
if mode not in ['valid', 'wrap_centered']: if mode not in ['valid', 'wrap_centered']:
raise NotImplementedError("Only the mode valid and wrap_centered" raise NotImplementedError("Only the mode valid and wrap_centered"
......
...@@ -20,7 +20,10 @@ import multinomial ...@@ -20,7 +20,10 @@ import multinomial
from theano.sandbox.cuda import cuda_available, cuda_enabled from theano.sandbox.cuda import cuda_available, cuda_enabled
if cuda_available: if cuda_available:
from theano.sandbox.cuda import CudaNdarrayType, float32_shared_constructor from theano.sandbox.cuda import (CudaNdarrayType,
float32_shared_constructor,
GpuOp)
def mulmod(a, b, c, m): def mulmod(a, b, c, m):
r = numpy.int32((numpy.int64(a)*b + c) % m) r = numpy.int32((numpy.int64(a)*b + c) % m)
...@@ -372,7 +375,7 @@ class mrg_uniform(mrg_uniform_base): ...@@ -372,7 +375,7 @@ class mrg_uniform(mrg_uniform_base):
def c_code_cache_version(self): def c_code_cache_version(self):
return (1,) return (1,)
class GPU_mrg_uniform(mrg_uniform_base): class GPU_mrg_uniform(mrg_uniform_base, GpuOp):
#GPU VERSION #GPU VERSION
@classmethod @classmethod
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论