提交 b7bc0916 authored 作者: Frederic's avatar Frederic

Make a GpuOp and make all op that use the GPU inherit from it.

This allow to make sure the test for the driver was executed.
上级 8333fe82
...@@ -7,9 +7,9 @@ from theano.sandbox.cuda.basic_ops import as_cuda_ndarray_variable, host_from_gp ...@@ -7,9 +7,9 @@ from theano.sandbox.cuda.basic_ops import as_cuda_ndarray_variable, host_from_gp
from theano.misc import strutil from theano.misc import strutil
from theano.tensor.nnet.Conv3D import Conv3D from theano.tensor.nnet.Conv3D import Conv3D
from theano.sandbox.cuda.opt import register_opt from theano.sandbox.cuda.opt import register_opt
from theano.sandbox.cuda import CudaNdarrayType from theano.sandbox.cuda import CudaNdarrayType, GpuOp
class GpuConv3D(theano.Op): class GpuConv3D(GpuOp):
""" GPU implementation of Conv3D """ """ GPU implementation of Conv3D """
def __eq__(self, other): def __eq__(self, other):
......
...@@ -8,11 +8,12 @@ from theano.misc import strutil ...@@ -8,11 +8,12 @@ from theano.misc import strutil
from theano.tensor.nnet.ConvGrad3D import ConvGrad3D from theano.tensor.nnet.ConvGrad3D import ConvGrad3D
from theano.sandbox.cuda.opt import register_opt from theano.sandbox.cuda.opt import register_opt
from theano.sandbox.cuda import CudaNdarrayType, HostFromGpu, host_from_gpu from theano.sandbox.cuda import (CudaNdarrayType, HostFromGpu,
host_from_gpu, GpuOp)
class GpuConvGrad3D(theano.Op): class GpuConvGrad3D(GpuOp):
""" GPU version of gradient of ConvGrad3D with respect to W """ """ GPU version of gradient of ConvGrad3D with respect to W """
def make_node(self, V, d, WShape, dCdH): def make_node(self, V, d, WShape, dCdH):
......
...@@ -9,10 +9,11 @@ from theano.gof import local_optimizer ...@@ -9,10 +9,11 @@ from theano.gof import local_optimizer
from theano.sandbox.cuda.basic_ops import as_cuda_ndarray_variable from theano.sandbox.cuda.basic_ops import as_cuda_ndarray_variable
from theano.sandbox.cuda.opt import register_opt from theano.sandbox.cuda.opt import register_opt
from theano.sandbox.cuda import CudaNdarrayType, HostFromGpu, host_from_gpu from theano.sandbox.cuda import (CudaNdarrayType, HostFromGpu,
host_from_gpu, GpuOp)
class GpuConvTransp3D(theano.Op): class GpuConvTransp3D(GpuOp):
""" The gpu version of ConvTransp3D """ """ The gpu version of ConvTransp3D """
def __eq__(self,other): def __eq__(self,other):
return type(self) == type(other) return type(self) == type(other)
......
...@@ -173,7 +173,7 @@ if cuda_available: ...@@ -173,7 +173,7 @@ if cuda_available:
shared_constructor = float32_shared_constructor shared_constructor = float32_shared_constructor
import basic_ops import basic_ops
from basic_ops import (GpuFromHost, HostFromGpu, GpuElemwise, from basic_ops import (GpuOp, GpuFromHost, HostFromGpu, GpuElemwise,
GpuDimShuffle, GpuSum, GpuReshape, GpuContiguous, GpuDimShuffle, GpuSum, GpuReshape, GpuContiguous,
GpuSubtensor, GpuIncSubtensor, GpuSubtensor, GpuIncSubtensor,
GpuAdvancedSubtensor1, GpuAdvancedIncSubtensor1, GpuAdvancedSubtensor1, GpuAdvancedIncSubtensor1,
......
...@@ -33,7 +33,20 @@ def as_cuda_array(obj): ...@@ -33,7 +33,20 @@ def as_cuda_array(obj):
else: else:
raise TypeError("Don't know how to cast to a CudaNdarray object") raise TypeError("Don't know how to cast to a CudaNdarray object")
class HostFromGpu(Op):
class GpuOp(Op):
def make_thunk(self, node, storage_map, compute_map, no_recycling):
if theano.sandbox.cuda.use.device_number is None:
theano.sandbox.cuda.use("gpu",
force=True,
default_to_move_computation_to_gpu=False,
move_shared_float32_to_gpu=False,
enable_cuda=False)
return super(GpuOp, self).make_thunk(node, storage_map,
compute_map, no_recycling)
class HostFromGpu(GpuOp):
""" """
Implement the transfer from gpu to the cpu. Implement the transfer from gpu to the cpu.
""" """
...@@ -65,7 +78,7 @@ class HostFromGpu(Op): ...@@ -65,7 +78,7 @@ class HostFromGpu(Op):
return xshp return xshp
host_from_gpu = HostFromGpu() host_from_gpu = HostFromGpu()
class GpuFromHost(Op): class GpuFromHost(GpuOp):
""" """
Implement the transfer from cpu to the gpu. Implement the transfer from cpu to the gpu.
""" """
...@@ -98,7 +111,8 @@ class GpuFromHost(Op): ...@@ -98,7 +111,8 @@ class GpuFromHost(Op):
return xshp return xshp
gpu_from_host = GpuFromHost() gpu_from_host = GpuFromHost()
class GpuElemwise(Op):
class GpuElemwise(GpuOp):
""" """
Implement a generic elemwise on the gpu. Implement a generic elemwise on the gpu.
""" """
...@@ -208,7 +222,7 @@ class GpuElemwise(Op): ...@@ -208,7 +222,7 @@ class GpuElemwise(Op):
def c_code_cache_version(self): def c_code_cache_version(self):
return self.src_generator.cache_version return self.src_generator.cache_version
class GpuDimShuffle(Op): class GpuDimShuffle(GpuOp):
""" """
Implement DimShuffle on the gpu. Implement DimShuffle on the gpu.
""" """
...@@ -397,7 +411,7 @@ class GpuDimShuffle(Op): ...@@ -397,7 +411,7 @@ class GpuDimShuffle(Op):
def c_code_cache_version(self): def c_code_cache_version(self):
return (1,0) return (1,0)
class GpuSum(Op): class GpuSum(GpuOp):
"""GpuSum is a Reduction along some dimensions by summation. """GpuSum is a Reduction along some dimensions by summation.
The dimensions along which to sum is specified by the `reduce_mask` that you pass to the The dimensions along which to sum is specified by the `reduce_mask` that you pass to the
...@@ -1717,7 +1731,7 @@ class GpuSum(Op): ...@@ -1717,7 +1731,7 @@ class GpuSum(Op):
""" %locals() """ %locals()
return sio.getvalue() return sio.getvalue()
class GpuReshape(tensor.Reshape): class GpuReshape(tensor.Reshape, GpuOp):
""" """
Implement Reshape on the gpu. Implement Reshape on the gpu.
""" """
...@@ -1733,7 +1747,7 @@ class GpuReshape(tensor.Reshape): ...@@ -1733,7 +1747,7 @@ class GpuReshape(tensor.Reshape):
', should be %i' % (len(shp), self.ndim), shp) ', should be %i' % (len(shp), self.ndim), shp)
out[0] = x.reshape(tuple(shp)) out[0] = x.reshape(tuple(shp))
class GpuSubtensor(tensor.Subtensor): class GpuSubtensor(tensor.Subtensor, GpuOp):
""" """
Implement subtensor on the gpu. Implement subtensor on the gpu.
""" """
...@@ -1764,7 +1778,7 @@ class GpuSubtensor(tensor.Subtensor): ...@@ -1764,7 +1778,7 @@ class GpuSubtensor(tensor.Subtensor):
cdata = cdata[0] cdata = cdata[0]
out[0] = x.__getitem__(cdata) out[0] = x.__getitem__(cdata)
class GpuAdvancedSubtensor1(tensor.AdvancedSubtensor1): class GpuAdvancedSubtensor1(tensor.AdvancedSubtensor1, GpuOp):
""" """
Implement AdvancedSubtensor1 on the gpu. Implement AdvancedSubtensor1 on the gpu.
""" """
...@@ -1790,7 +1804,7 @@ class GpuAdvancedSubtensor1(tensor.AdvancedSubtensor1): ...@@ -1790,7 +1804,7 @@ class GpuAdvancedSubtensor1(tensor.AdvancedSubtensor1):
o[j] = x[i] o[j] = x[i]
out[0] = o out[0] = o
class GpuAdvancedIncSubtensor1(tensor.AdvancedIncSubtensor1): class GpuAdvancedIncSubtensor1(tensor.AdvancedIncSubtensor1, GpuOp):
""" """
Implement AdvancedIncSubtensor1 on the gpu. Implement AdvancedIncSubtensor1 on the gpu.
""" """
...@@ -1818,7 +1832,7 @@ class GpuAdvancedIncSubtensor1(tensor.AdvancedIncSubtensor1): ...@@ -1818,7 +1832,7 @@ class GpuAdvancedIncSubtensor1(tensor.AdvancedIncSubtensor1):
# CudaNdarray_Subscript() don't support Advanced slicing. # CudaNdarray_Subscript() don't support Advanced slicing.
# so we use the parent version that loop on each indices. # so we use the parent version that loop on each indices.
class GpuIncSubtensor(tensor.IncSubtensor): class GpuIncSubtensor(tensor.IncSubtensor, GpuOp):
""" """
Implement IncSubtensor on the gpu. Implement IncSubtensor on the gpu.
""" """
...@@ -1828,7 +1842,7 @@ class GpuIncSubtensor(tensor.IncSubtensor): ...@@ -1828,7 +1842,7 @@ class GpuIncSubtensor(tensor.IncSubtensor):
rval = tensor.IncSubtensor.make_node(self, x, y, *inputs) rval = tensor.IncSubtensor.make_node(self, x, y, *inputs)
return Apply(self, [x,y]+rval.inputs[2:], [x.type()]) return Apply(self, [x,y]+rval.inputs[2:], [x.type()])
class GpuFlatten(tensor.Flatten): class GpuFlatten(tensor.Flatten, GpuOp):
""" """
Implement Flatten on the gpu. Implement Flatten on the gpu.
""" """
...@@ -1839,7 +1853,7 @@ class GpuFlatten(tensor.Flatten): ...@@ -1839,7 +1853,7 @@ class GpuFlatten(tensor.Flatten):
out_type = CudaNdarrayType(broadcastable=host_out_broadcastable) out_type = CudaNdarrayType(broadcastable=host_out_broadcastable)
return Apply(self, [x], [out_type()]) return Apply(self, [x], [out_type()])
class GpuShape(tensor.Shape): class GpuShape(tensor.Shape, GpuOp):
""" """
Implement Shape on the gpu. Implement Shape on the gpu.
""" """
...@@ -1847,7 +1861,7 @@ class GpuShape(tensor.Shape): ...@@ -1847,7 +1861,7 @@ class GpuShape(tensor.Shape):
return Apply(self, [x], [tensor.lvector()]) return Apply(self, [x], [tensor.lvector()])
gpu_shape = GpuShape() gpu_shape = GpuShape()
class GpuJoin(tensor.Join): class GpuJoin(tensor.Join, GpuOp):
""" """
Implement Join on the gpu. Implement Join on the gpu.
""" """
...@@ -1924,7 +1938,7 @@ class GpuJoin(tensor.Join): ...@@ -1924,7 +1938,7 @@ class GpuJoin(tensor.Join):
gpu_join = GpuJoin() gpu_join = GpuJoin()
class GpuAlloc(Op): class GpuAlloc(GpuOp):
""" """
Implement Alloc on the gpu. Implement Alloc on the gpu.
""" """
...@@ -2023,7 +2037,7 @@ class GpuAlloc(Op): ...@@ -2023,7 +2037,7 @@ class GpuAlloc(Op):
gpu_alloc = GpuAlloc() gpu_alloc = GpuAlloc()
class GpuContiguous(Op): class GpuContiguous(GpuOp):
""" """
Always return a c contiguous output. Copy the input only if it is Always return a c contiguous output. Copy the input only if it is
not already c contiguous. not already c contiguous.
......
...@@ -4,8 +4,9 @@ import StringIO, os ...@@ -4,8 +4,9 @@ import StringIO, os
import cuda_ndarray.cuda_ndarray as cuda import cuda_ndarray.cuda_ndarray as cuda
from theano.sandbox.cuda.type import CudaNdarrayType from theano.sandbox.cuda.type import CudaNdarrayType
from theano.sandbox.cuda import GpuOp
class GpuDot22(Op): class GpuDot22(GpuOp):
""" """
Implement dot(2d, 2d) on the gpu. Implement dot(2d, 2d) on the gpu.
""" """
...@@ -76,7 +77,7 @@ class GpuDot22(Op): ...@@ -76,7 +77,7 @@ class GpuDot22(Op):
""" % locals() """ % locals()
gpu_dot22 = GpuDot22() gpu_dot22 = GpuDot22()
class GpuDot22Scalar(Op): class GpuDot22Scalar(GpuOp):
""" """
Implement dot(2d, 2d) * scalar on the gpu. Implement dot(2d, 2d) * scalar on the gpu.
""" """
...@@ -155,7 +156,7 @@ class GpuDot22Scalar(Op): ...@@ -155,7 +156,7 @@ class GpuDot22Scalar(Op):
""" % locals() """ % locals()
gpu_dot22scalar = GpuDot22Scalar() gpu_dot22scalar = GpuDot22Scalar()
class GpuGemm(Op): class GpuGemm(GpuOp):
""" """
implement the gemm on the gpu. implement the gemm on the gpu.
...@@ -257,7 +258,7 @@ class GpuGemm(Op): ...@@ -257,7 +258,7 @@ class GpuGemm(Op):
gpu_gemm_no_inplace = GpuGemm(inplace=False) gpu_gemm_no_inplace = GpuGemm(inplace=False)
gpu_gemm_inplace = GpuGemm(inplace=True) gpu_gemm_inplace = GpuGemm(inplace=True)
class GpuGemv(Op): class GpuGemv(GpuOp):
""" """
implement gemv on the gpu. implement gemv on the gpu.
...@@ -348,7 +349,7 @@ class GpuGemv(Op): ...@@ -348,7 +349,7 @@ class GpuGemv(Op):
gpu_gemv_no_inplace = GpuGemv(inplace=False) gpu_gemv_no_inplace = GpuGemv(inplace=False)
gpu_gemv_inplace = GpuGemv(inplace=True) gpu_gemv_inplace = GpuGemv(inplace=True)
class GpuGer(Op): class GpuGer(GpuOp):
""" """
implement ger on the gpu. implement ger on the gpu.
...@@ -439,7 +440,7 @@ class GpuGer(Op): ...@@ -439,7 +440,7 @@ class GpuGer(Op):
gpu_ger_no_inplace = GpuGer(inplace=False) gpu_ger_no_inplace = GpuGer(inplace=False)
gpu_ger_inplace = GpuGer(inplace=True) gpu_ger_inplace = GpuGer(inplace=True)
class GpuOuter(Op): class GpuOuter(GpuOp):
""" Implement outer on the gpu.""" """ Implement outer on the gpu."""
def make_node(self, x, y): def make_node(self, x, y):
# we suppose type checking has been done, but make sure. # we suppose type checking has been done, but make sure.
...@@ -532,7 +533,7 @@ gpu_outer = GpuOuter() ...@@ -532,7 +533,7 @@ gpu_outer = GpuOuter()
## ##
# Not really a BLAS operation, but whatever. # Not really a BLAS operation, but whatever.
# #
class GpuConv(Op): class GpuConv(GpuOp):
""" """
Implement the batched and stacked 2d convolution on the gpu. Implement the batched and stacked 2d convolution on the gpu.
""" """
...@@ -698,7 +699,7 @@ class GpuConv(Op): ...@@ -698,7 +699,7 @@ class GpuConv(Op):
"""%sub """%sub
class GpuDownsampleFactorMax(Op): class GpuDownsampleFactorMax(GpuOp):
""" """
Implement downsample with max on the gpu. Implement downsample with max on the gpu.
""" """
...@@ -858,7 +859,7 @@ class GpuDownsampleFactorMax(Op): ...@@ -858,7 +859,7 @@ class GpuDownsampleFactorMax(Op):
} }
""" % locals() """ % locals()
class GpuDownsampleFactorMaxGrad(Op): class GpuDownsampleFactorMaxGrad(GpuOp):
""" """
Implement the grad of downsample with max on the gpu. Implement the grad of downsample with max on the gpu.
""" """
......
...@@ -3,11 +3,12 @@ from theano import tensor, scalar ...@@ -3,11 +3,12 @@ from theano import tensor, scalar
import StringIO import StringIO
from theano.sandbox.cuda.type import CudaNdarrayType from theano.sandbox.cuda.type import CudaNdarrayType
from theano.sandbox.cuda import GpuOp
from theano.sandbox.cuda.kernel_codegen import nvcc_kernel, inline_reduce_max, inline_reduce_sum, inline_softmax from theano.sandbox.cuda.kernel_codegen import nvcc_kernel, inline_reduce_max, inline_reduce_sum, inline_softmax
class GpuCrossentropySoftmaxArgmax1HotWithBias (Op): class GpuCrossentropySoftmaxArgmax1HotWithBias (GpuOp):
""" """
Implement CrossentropySoftmaxArgmax1HotWithBias on the gpu. Implement CrossentropySoftmaxArgmax1HotWithBias on the gpu.
""" """
...@@ -180,7 +181,7 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias (Op): ...@@ -180,7 +181,7 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias (Op):
gpu_crossentropy_softmax_argmax_1hot_with_bias = GpuCrossentropySoftmaxArgmax1HotWithBias() gpu_crossentropy_softmax_argmax_1hot_with_bias = GpuCrossentropySoftmaxArgmax1HotWithBias()
class GpuCrossentropySoftmax1HotWithBiasDx (Op): class GpuCrossentropySoftmax1HotWithBiasDx (GpuOp):
""" """
Implement CrossentropySoftmax1HotWithBiasDx on the gpu. Implement CrossentropySoftmax1HotWithBiasDx on the gpu.
""" """
...@@ -302,7 +303,7 @@ class GpuCrossentropySoftmax1HotWithBiasDx (Op): ...@@ -302,7 +303,7 @@ class GpuCrossentropySoftmax1HotWithBiasDx (Op):
gpu_crossentropy_softmax_1hot_with_bias_dx = GpuCrossentropySoftmax1HotWithBiasDx() gpu_crossentropy_softmax_1hot_with_bias_dx = GpuCrossentropySoftmax1HotWithBiasDx()
class GpuSoftmax (Op): class GpuSoftmax (GpuOp):
""" """
Implement Softmax on the gpu. Implement Softmax on the gpu.
""" """
...@@ -400,7 +401,7 @@ class GpuSoftmax (Op): ...@@ -400,7 +401,7 @@ class GpuSoftmax (Op):
gpu_softmax = GpuSoftmax() gpu_softmax = GpuSoftmax()
class GpuSoftmaxWithBias (Op): class GpuSoftmaxWithBias (GpuOp):
""" """
Implement SoftmaxWithBias on the gpu. Implement SoftmaxWithBias on the gpu.
""" """
......
...@@ -10,7 +10,7 @@ __contact__ = "theano-dev@googlegroups.com" ...@@ -10,7 +10,7 @@ __contact__ = "theano-dev@googlegroups.com"
import sys import sys
import numpy import numpy
import theano.gof import theano.gof
from theano.sandbox.cuda import CudaNdarrayType from theano.sandbox.cuda import CudaNdarrayType, GpuOp
from theano.tensor import (get_vector_length, cast, opt) from theano.tensor import (get_vector_length, cast, opt)
from theano.compile import optdb from theano.compile import optdb
from theano.gof import local_optimizer, Variable from theano.gof import local_optimizer, Variable
...@@ -19,7 +19,7 @@ from theano.gof import local_optimizer, Variable ...@@ -19,7 +19,7 @@ from theano.gof import local_optimizer, Variable
config = theano.config config = theano.config
class CURAND_Base(theano.gof.Op): class CURAND_Base(GpuOp):
""" Base class for a random number generator implemented in CURAND. """ Base class for a random number generator implemented in CURAND.
The random number generator itself is an opaque reference managed by The random number generator itself is an opaque reference managed by
......
...@@ -5,7 +5,7 @@ from theano.gof import local_optimizer ...@@ -5,7 +5,7 @@ from theano.gof import local_optimizer
from theano.sandbox.cuda import cuda_available from theano.sandbox.cuda import cuda_available
if cuda_available: if cuda_available:
from theano.sandbox.cuda import CudaNdarrayType from theano.sandbox.cuda import CudaNdarrayType, GpuOp
from theano.sandbox.cuda.basic_ops import host_from_gpu, gpu_from_host from theano.sandbox.cuda.basic_ops import host_from_gpu, gpu_from_host
from theano.sandbox.cuda.opt import register_opt from theano.sandbox.cuda.opt import register_opt
...@@ -120,7 +120,7 @@ class MultinomialFromUniform(Op): ...@@ -120,7 +120,7 @@ class MultinomialFromUniform(Op):
""" % locals() """ % locals()
class GpuMultinomialFromUniform(MultinomialFromUniform): class GpuMultinomialFromUniform(MultinomialFromUniform, GpuOp):
""" """
The output is transposed compared to MultinomialFromUniform. The output is transposed compared to MultinomialFromUniform.
We must insert a Transpose op after it. We must insert a Transpose op after it.
......
...@@ -5,7 +5,7 @@ from theano.gof import local_optimizer ...@@ -5,7 +5,7 @@ from theano.gof import local_optimizer
from theano.sandbox.cuda import cuda_available from theano.sandbox.cuda import cuda_available
if cuda_available: if cuda_available:
from theano.sandbox.cuda import CudaNdarrayType from theano.sandbox.cuda import CudaNdarrayType, GpuOp
from theano.sandbox.cuda.basic_ops import host_from_gpu, gpu_from_host from theano.sandbox.cuda.basic_ops import host_from_gpu, gpu_from_host
from theano.sandbox.cuda.opt import register_opt as register_gpu_opt from theano.sandbox.cuda.opt import register_opt as register_gpu_opt
...@@ -292,7 +292,7 @@ def neibs2images(neibs, neib_shape, original_shape, mode='valid'): ...@@ -292,7 +292,7 @@ def neibs2images(neibs, neib_shape, original_shape, mode='valid'):
# This is work in progress # This is work in progress
class GpuImages2Neibs(Images2Neibs): class GpuImages2Neibs(Images2Neibs, GpuOp):
def __init__(self, mode='valid'): def __init__(self, mode='valid'):
if mode not in ['valid', 'wrap_centered']: if mode not in ['valid', 'wrap_centered']:
raise NotImplementedError("Only the mode valid and wrap_centered" raise NotImplementedError("Only the mode valid and wrap_centered"
......
...@@ -20,7 +20,10 @@ import multinomial ...@@ -20,7 +20,10 @@ import multinomial
from theano.sandbox.cuda import cuda_available, cuda_enabled from theano.sandbox.cuda import cuda_available, cuda_enabled
if cuda_available: if cuda_available:
from theano.sandbox.cuda import CudaNdarrayType, float32_shared_constructor from theano.sandbox.cuda import (CudaNdarrayType,
float32_shared_constructor,
GpuOp)
def mulmod(a, b, c, m): def mulmod(a, b, c, m):
r = numpy.int32((numpy.int64(a)*b + c) % m) r = numpy.int32((numpy.int64(a)*b + c) % m)
...@@ -372,7 +375,7 @@ class mrg_uniform(mrg_uniform_base): ...@@ -372,7 +375,7 @@ class mrg_uniform(mrg_uniform_base):
def c_code_cache_version(self): def c_code_cache_version(self):
return (1,) return (1,)
class GPU_mrg_uniform(mrg_uniform_base): class GPU_mrg_uniform(mrg_uniform_base, GpuOp):
#GPU VERSION #GPU VERSION
@classmethod @classmethod
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论