提交 653b51b5 authored 作者: Frederic's avatar Frederic

pep8

上级 7fcbb026
...@@ -685,15 +685,19 @@ Modify and execute to work for a matrix of shape (20, 10). ...@@ -685,15 +685,19 @@ Modify and execute to work for a matrix of shape (20, 10).
class PyCUDADoubleOp(theano.Op): class PyCUDADoubleOp(theano.Op):
def __eq__(self, other): def __eq__(self, other):
return type(self) == type(other) return type(self) == type(other)
def __hash__(self): def __hash__(self):
return hash(type(self)) return hash(type(self))
def __str__(self): def __str__(self):
return self.__class__.__name__ return self.__class__.__name__
def make_node(self, inp): def make_node(self, inp):
inp = cuda.basic_ops.gpu_contiguous( inp = cuda.basic_ops.gpu_contiguous(
cuda.basic_ops.as_cuda_ndarray_variable(inp)) cuda.basic_ops.as_cuda_ndarray_variable(inp))
assert inp.dtype == "float32" assert inp.dtype == "float32"
return theano.Apply(self, [inp], [inp.type()]) return theano.Apply(self, [inp], [inp.type()])
def make_thunk(self, node, storage_map, _, _2): def make_thunk(self, node, storage_map, _, _2):
mod = SourceModule(""" mod = SourceModule("""
__global__ void my_fct(float * i0, float * o0, int size) { __global__ void my_fct(float * i0, float * o0, int size) {
...@@ -703,15 +707,16 @@ Modify and execute to work for a matrix of shape (20, 10). ...@@ -703,15 +707,16 @@ Modify and execute to work for a matrix of shape (20, 10).
} }
}""") }""")
pycuda_fct = mod.get_function("my_fct") pycuda_fct = mod.get_function("my_fct")
inputs = [ storage_map[v] for v in node.inputs] inputs = [storage_map[v] for v in node.inputs]
outputs = [ storage_map[v] for v in node.outputs] outputs = [storage_map[v] for v in node.outputs]
def thunk(): def thunk():
z = outputs[0] z = outputs[0]
if z[0] is None or z[0].shape!=inputs[0][0].shape: if z[0] is None or z[0].shape != inputs[0][0].shape:
z[0] = cuda.CudaNdarray.zeros(inputs[0][0].shape) z[0] = cuda.CudaNdarray.zeros(inputs[0][0].shape)
grid = (int(numpy.ceil(inputs[0][0].size / 512.)),1) grid = (int(numpy.ceil(inputs[0][0].size / 512.)), 1)
pycuda_fct(inputs[0][0], z[0], numpy.intc(inputs[0][0].size), pycuda_fct(inputs[0][0], z[0], numpy.intc(inputs[0][0].size),
block=(512,1,1), grid=grid) block=(512, 1, 1), grid=grid)
return thunk return thunk
...@@ -719,7 +724,7 @@ Use this code to test it: ...@@ -719,7 +724,7 @@ Use this code to test it:
>>> x = theano.tensor.fmatrix() >>> x = theano.tensor.fmatrix()
>>> f = theano.function([x], PyCUDADoubleOp()(x)) >>> f = theano.function([x], PyCUDADoubleOp()(x))
>>> xv=numpy.ones((4,5), dtype="float32") >>> xv = numpy.ones((4, 5), dtype="float32")
>>> assert numpy.allclose(f(xv), xv*2) >>> assert numpy.allclose(f(xv), xv*2)
>>> print numpy.asarray(f(xv)) >>> print numpy.asarray(f(xv))
......
...@@ -9,7 +9,7 @@ if not theano.misc.pycuda_init.pycuda_available: ...@@ -9,7 +9,7 @@ if not theano.misc.pycuda_init.pycuda_available:
" with pycuda code.") " with pycuda code.")
import theano.sandbox.cuda as cuda_ndarray import theano.sandbox.cuda as cuda_ndarray
if cuda_ndarray.cuda_available == False: if not cuda_ndarray.cuda_available:
from nose.plugins.skip import SkipTest from nose.plugins.skip import SkipTest
raise SkipTest('Optional package cuda disabled') raise SkipTest('Optional package cuda disabled')
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论