提交 b7bc0916 authored 作者: Frederic's avatar Frederic

Make a GpuOp and make all op that use the GPU inherit from it.

This allow to make sure the test for the driver was executed.
上级 8333fe82
......@@ -7,9 +7,9 @@ from theano.sandbox.cuda.basic_ops import as_cuda_ndarray_variable, host_from_gp
from theano.misc import strutil
from theano.tensor.nnet.Conv3D import Conv3D
from theano.sandbox.cuda.opt import register_opt
from theano.sandbox.cuda import CudaNdarrayType
from theano.sandbox.cuda import CudaNdarrayType, GpuOp
class GpuConv3D(theano.Op):
class GpuConv3D(GpuOp):
""" GPU implementation of Conv3D """
def __eq__(self, other):
......
......@@ -8,11 +8,12 @@ from theano.misc import strutil
from theano.tensor.nnet.ConvGrad3D import ConvGrad3D
from theano.sandbox.cuda.opt import register_opt
from theano.sandbox.cuda import CudaNdarrayType, HostFromGpu, host_from_gpu
from theano.sandbox.cuda import (CudaNdarrayType, HostFromGpu,
host_from_gpu, GpuOp)
class GpuConvGrad3D(theano.Op):
class GpuConvGrad3D(GpuOp):
""" GPU version of gradient of ConvGrad3D with respect to W """
def make_node(self, V, d, WShape, dCdH):
......
......@@ -9,10 +9,11 @@ from theano.gof import local_optimizer
from theano.sandbox.cuda.basic_ops import as_cuda_ndarray_variable
from theano.sandbox.cuda.opt import register_opt
from theano.sandbox.cuda import CudaNdarrayType, HostFromGpu, host_from_gpu
from theano.sandbox.cuda import (CudaNdarrayType, HostFromGpu,
host_from_gpu, GpuOp)
class GpuConvTransp3D(theano.Op):
class GpuConvTransp3D(GpuOp):
""" The gpu version of ConvTransp3D """
def __eq__(self,other):
return type(self) == type(other)
......
......@@ -173,7 +173,7 @@ if cuda_available:
shared_constructor = float32_shared_constructor
import basic_ops
from basic_ops import (GpuFromHost, HostFromGpu, GpuElemwise,
from basic_ops import (GpuOp, GpuFromHost, HostFromGpu, GpuElemwise,
GpuDimShuffle, GpuSum, GpuReshape, GpuContiguous,
GpuSubtensor, GpuIncSubtensor,
GpuAdvancedSubtensor1, GpuAdvancedIncSubtensor1,
......
......@@ -33,7 +33,20 @@ def as_cuda_array(obj):
else:
raise TypeError("Don't know how to cast to a CudaNdarray object")
class HostFromGpu(Op):
class GpuOp(Op):
def make_thunk(self, node, storage_map, compute_map, no_recycling):
if theano.sandbox.cuda.use.device_number is None:
theano.sandbox.cuda.use("gpu",
force=True,
default_to_move_computation_to_gpu=False,
move_shared_float32_to_gpu=False,
enable_cuda=False)
return super(GpuOp, self).make_thunk(node, storage_map,
compute_map, no_recycling)
class HostFromGpu(GpuOp):
"""
Implement the transfer from gpu to the cpu.
"""
......@@ -65,7 +78,7 @@ class HostFromGpu(Op):
return xshp
host_from_gpu = HostFromGpu()
class GpuFromHost(Op):
class GpuFromHost(GpuOp):
"""
Implement the transfer from cpu to the gpu.
"""
......@@ -98,7 +111,8 @@ class GpuFromHost(Op):
return xshp
gpu_from_host = GpuFromHost()
class GpuElemwise(Op):
class GpuElemwise(GpuOp):
"""
Implement a generic elemwise on the gpu.
"""
......@@ -208,7 +222,7 @@ class GpuElemwise(Op):
def c_code_cache_version(self):
return self.src_generator.cache_version
class GpuDimShuffle(Op):
class GpuDimShuffle(GpuOp):
"""
Implement DimShuffle on the gpu.
"""
......@@ -397,7 +411,7 @@ class GpuDimShuffle(Op):
def c_code_cache_version(self):
return (1,0)
class GpuSum(Op):
class GpuSum(GpuOp):
"""GpuSum is a Reduction along some dimensions by summation.
The dimensions along which to sum is specified by the `reduce_mask` that you pass to the
......@@ -1717,7 +1731,7 @@ class GpuSum(Op):
""" %locals()
return sio.getvalue()
class GpuReshape(tensor.Reshape):
class GpuReshape(tensor.Reshape, GpuOp):
"""
Implement Reshape on the gpu.
"""
......@@ -1733,7 +1747,7 @@ class GpuReshape(tensor.Reshape):
', should be %i' % (len(shp), self.ndim), shp)
out[0] = x.reshape(tuple(shp))
class GpuSubtensor(tensor.Subtensor):
class GpuSubtensor(tensor.Subtensor, GpuOp):
"""
Implement subtensor on the gpu.
"""
......@@ -1764,7 +1778,7 @@ class GpuSubtensor(tensor.Subtensor):
cdata = cdata[0]
out[0] = x.__getitem__(cdata)
class GpuAdvancedSubtensor1(tensor.AdvancedSubtensor1):
class GpuAdvancedSubtensor1(tensor.AdvancedSubtensor1, GpuOp):
"""
Implement AdvancedSubtensor1 on the gpu.
"""
......@@ -1790,7 +1804,7 @@ class GpuAdvancedSubtensor1(tensor.AdvancedSubtensor1):
o[j] = x[i]
out[0] = o
class GpuAdvancedIncSubtensor1(tensor.AdvancedIncSubtensor1):
class GpuAdvancedIncSubtensor1(tensor.AdvancedIncSubtensor1, GpuOp):
"""
Implement AdvancedIncSubtensor1 on the gpu.
"""
......@@ -1818,7 +1832,7 @@ class GpuAdvancedIncSubtensor1(tensor.AdvancedIncSubtensor1):
# CudaNdarray_Subscript() don't support Advanced slicing.
# so we use the parent version that loop on each indices.
class GpuIncSubtensor(tensor.IncSubtensor):
class GpuIncSubtensor(tensor.IncSubtensor, GpuOp):
"""
Implement IncSubtensor on the gpu.
"""
......@@ -1828,7 +1842,7 @@ class GpuIncSubtensor(tensor.IncSubtensor):
rval = tensor.IncSubtensor.make_node(self, x, y, *inputs)
return Apply(self, [x,y]+rval.inputs[2:], [x.type()])
class GpuFlatten(tensor.Flatten):
class GpuFlatten(tensor.Flatten, GpuOp):
"""
Implement Flatten on the gpu.
"""
......@@ -1839,7 +1853,7 @@ class GpuFlatten(tensor.Flatten):
out_type = CudaNdarrayType(broadcastable=host_out_broadcastable)
return Apply(self, [x], [out_type()])
class GpuShape(tensor.Shape):
class GpuShape(tensor.Shape, GpuOp):
"""
Implement Shape on the gpu.
"""
......@@ -1847,7 +1861,7 @@ class GpuShape(tensor.Shape):
return Apply(self, [x], [tensor.lvector()])
gpu_shape = GpuShape()
class GpuJoin(tensor.Join):
class GpuJoin(tensor.Join, GpuOp):
"""
Implement Join on the gpu.
"""
......@@ -1924,7 +1938,7 @@ class GpuJoin(tensor.Join):
gpu_join = GpuJoin()
class GpuAlloc(Op):
class GpuAlloc(GpuOp):
"""
Implement Alloc on the gpu.
"""
......@@ -2023,7 +2037,7 @@ class GpuAlloc(Op):
gpu_alloc = GpuAlloc()
class GpuContiguous(Op):
class GpuContiguous(GpuOp):
"""
Always return a c contiguous output. Copy the input only if it is
not already c contiguous.
......
......@@ -4,8 +4,9 @@ import StringIO, os
import cuda_ndarray.cuda_ndarray as cuda
from theano.sandbox.cuda.type import CudaNdarrayType
from theano.sandbox.cuda import GpuOp
class GpuDot22(Op):
class GpuDot22(GpuOp):
"""
Implement dot(2d, 2d) on the gpu.
"""
......@@ -76,7 +77,7 @@ class GpuDot22(Op):
""" % locals()
gpu_dot22 = GpuDot22()
class GpuDot22Scalar(Op):
class GpuDot22Scalar(GpuOp):
"""
Implement dot(2d, 2d) * scalar on the gpu.
"""
......@@ -155,7 +156,7 @@ class GpuDot22Scalar(Op):
""" % locals()
gpu_dot22scalar = GpuDot22Scalar()
class GpuGemm(Op):
class GpuGemm(GpuOp):
"""
implement the gemm on the gpu.
......@@ -257,7 +258,7 @@ class GpuGemm(Op):
gpu_gemm_no_inplace = GpuGemm(inplace=False)
gpu_gemm_inplace = GpuGemm(inplace=True)
class GpuGemv(Op):
class GpuGemv(GpuOp):
"""
implement gemv on the gpu.
......@@ -348,7 +349,7 @@ class GpuGemv(Op):
gpu_gemv_no_inplace = GpuGemv(inplace=False)
gpu_gemv_inplace = GpuGemv(inplace=True)
class GpuGer(Op):
class GpuGer(GpuOp):
"""
implement ger on the gpu.
......@@ -439,7 +440,7 @@ class GpuGer(Op):
gpu_ger_no_inplace = GpuGer(inplace=False)
gpu_ger_inplace = GpuGer(inplace=True)
class GpuOuter(Op):
class GpuOuter(GpuOp):
""" Implement outer on the gpu."""
def make_node(self, x, y):
# we suppose type checking has been done, but make sure.
......@@ -532,7 +533,7 @@ gpu_outer = GpuOuter()
##
# Not really a BLAS operation, but whatever.
#
class GpuConv(Op):
class GpuConv(GpuOp):
"""
Implement the batched and stacked 2d convolution on the gpu.
"""
......@@ -698,7 +699,7 @@ class GpuConv(Op):
"""%sub
class GpuDownsampleFactorMax(Op):
class GpuDownsampleFactorMax(GpuOp):
"""
Implement downsample with max on the gpu.
"""
......@@ -858,7 +859,7 @@ class GpuDownsampleFactorMax(Op):
}
""" % locals()
class GpuDownsampleFactorMaxGrad(Op):
class GpuDownsampleFactorMaxGrad(GpuOp):
"""
Implement the grad of downsample with max on the gpu.
"""
......
......@@ -3,11 +3,12 @@ from theano import tensor, scalar
import StringIO
from theano.sandbox.cuda.type import CudaNdarrayType
from theano.sandbox.cuda import GpuOp
from theano.sandbox.cuda.kernel_codegen import nvcc_kernel, inline_reduce_max, inline_reduce_sum, inline_softmax
class GpuCrossentropySoftmaxArgmax1HotWithBias (Op):
class GpuCrossentropySoftmaxArgmax1HotWithBias (GpuOp):
"""
Implement CrossentropySoftmaxArgmax1HotWithBias on the gpu.
"""
......@@ -180,7 +181,7 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias (Op):
gpu_crossentropy_softmax_argmax_1hot_with_bias = GpuCrossentropySoftmaxArgmax1HotWithBias()
class GpuCrossentropySoftmax1HotWithBiasDx (Op):
class GpuCrossentropySoftmax1HotWithBiasDx (GpuOp):
"""
Implement CrossentropySoftmax1HotWithBiasDx on the gpu.
"""
......@@ -302,7 +303,7 @@ class GpuCrossentropySoftmax1HotWithBiasDx (Op):
gpu_crossentropy_softmax_1hot_with_bias_dx = GpuCrossentropySoftmax1HotWithBiasDx()
class GpuSoftmax (Op):
class GpuSoftmax (GpuOp):
"""
Implement Softmax on the gpu.
"""
......@@ -400,7 +401,7 @@ class GpuSoftmax (Op):
gpu_softmax = GpuSoftmax()
class GpuSoftmaxWithBias (Op):
class GpuSoftmaxWithBias (GpuOp):
"""
Implement SoftmaxWithBias on the gpu.
"""
......
......@@ -10,7 +10,7 @@ __contact__ = "theano-dev@googlegroups.com"
import sys
import numpy
import theano.gof
from theano.sandbox.cuda import CudaNdarrayType
from theano.sandbox.cuda import CudaNdarrayType, GpuOp
from theano.tensor import (get_vector_length, cast, opt)
from theano.compile import optdb
from theano.gof import local_optimizer, Variable
......@@ -19,7 +19,7 @@ from theano.gof import local_optimizer, Variable
config = theano.config
class CURAND_Base(theano.gof.Op):
class CURAND_Base(GpuOp):
""" Base class for a random number generator implemented in CURAND.
The random number generator itself is an opaque reference managed by
......
......@@ -5,7 +5,7 @@ from theano.gof import local_optimizer
from theano.sandbox.cuda import cuda_available
if cuda_available:
from theano.sandbox.cuda import CudaNdarrayType
from theano.sandbox.cuda import CudaNdarrayType, GpuOp
from theano.sandbox.cuda.basic_ops import host_from_gpu, gpu_from_host
from theano.sandbox.cuda.opt import register_opt
......@@ -120,7 +120,7 @@ class MultinomialFromUniform(Op):
""" % locals()
class GpuMultinomialFromUniform(MultinomialFromUniform):
class GpuMultinomialFromUniform(MultinomialFromUniform, GpuOp):
"""
The output is transposed compared to MultinomialFromUniform.
We must insert a Transpose op after it.
......
......@@ -5,7 +5,7 @@ from theano.gof import local_optimizer
from theano.sandbox.cuda import cuda_available
if cuda_available:
from theano.sandbox.cuda import CudaNdarrayType
from theano.sandbox.cuda import CudaNdarrayType, GpuOp
from theano.sandbox.cuda.basic_ops import host_from_gpu, gpu_from_host
from theano.sandbox.cuda.opt import register_opt as register_gpu_opt
......@@ -292,7 +292,7 @@ def neibs2images(neibs, neib_shape, original_shape, mode='valid'):
# This is work in progress
class GpuImages2Neibs(Images2Neibs):
class GpuImages2Neibs(Images2Neibs, GpuOp):
def __init__(self, mode='valid'):
if mode not in ['valid', 'wrap_centered']:
raise NotImplementedError("Only the mode valid and wrap_centered"
......
......@@ -20,7 +20,10 @@ import multinomial
from theano.sandbox.cuda import cuda_available, cuda_enabled
if cuda_available:
from theano.sandbox.cuda import CudaNdarrayType, float32_shared_constructor
from theano.sandbox.cuda import (CudaNdarrayType,
float32_shared_constructor,
GpuOp)
def mulmod(a, b, c, m):
r = numpy.int32((numpy.int64(a)*b + c) % m)
......@@ -372,7 +375,7 @@ class mrg_uniform(mrg_uniform_base):
def c_code_cache_version(self):
return (1,)
class GPU_mrg_uniform(mrg_uniform_base):
class GPU_mrg_uniform(mrg_uniform_base, GpuOp):
#GPU VERSION
@classmethod
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论