提交 dcfe53fd authored 作者: Benjamin Scellier's avatar Benjamin Scellier

file theano/misc/pycuda_example.py

上级 bad34f33
...@@ -22,7 +22,7 @@ TheanoElementwiseKernel. ...@@ -22,7 +22,7 @@ TheanoElementwiseKernel.
from __future__ import absolute_import, print_function, division from __future__ import absolute_import, print_function, division
from itertools import chain from itertools import chain
import numpy import numpy as np
import theano import theano
from six.moves import xrange from six.moves import xrange
...@@ -257,13 +257,13 @@ class PycudaElemwiseSourceModuleOp(GpuOp): ...@@ -257,13 +257,13 @@ class PycudaElemwiseSourceModuleOp(GpuOp):
" inputs don't have the same shape!") " inputs don't have the same shape!")
if inputs[0].size > 512: if inputs[0].size > 512:
grid = (int(numpy.ceil(inputs[0].size / 512.)), 1) grid = (int(np.ceil(inputs[0].size / 512.)), 1)
block = (512, 1, 1) block = (512, 1, 1)
else: else:
grid = (1, 1) grid = (1, 1)
block = (inputs[0].shape[0], inputs[0].shape[1], 1) block = (inputs[0].shape[0], inputs[0].shape[1], 1)
self.pycuda_fct(inputs[0], inputs[1], z[0], self.pycuda_fct(inputs[0], inputs[1], z[0],
numpy.intc(inputs[1].size), block=block, grid=grid) np.intc(inputs[1].size), block=block, grid=grid)
class PycudaElemwiseSourceModuleMakeThunkOp(Op): class PycudaElemwiseSourceModuleMakeThunkOp(Op):
...@@ -349,13 +349,13 @@ class PycudaElemwiseSourceModuleMakeThunkOp(Op): ...@@ -349,13 +349,13 @@ class PycudaElemwiseSourceModuleMakeThunkOp(Op):
" inputs don't have the same shape!") " inputs don't have the same shape!")
if inputs[0][0].size > 512: if inputs[0][0].size > 512:
grid = (int(numpy.ceil(inputs[0][0].size / 512.)), 1) grid = (int(np.ceil(inputs[0][0].size / 512.)), 1)
block = (512, 1, 1) block = (512, 1, 1)
else: else:
grid = (1, 1) grid = (1, 1)
block = (inputs[0][0].shape[0], inputs[0][0].shape[1], 1) block = (inputs[0][0].shape[0], inputs[0][0].shape[1], 1)
pycuda_fct(inputs[0][0], inputs[1][0], z[0], pycuda_fct(inputs[0][0], inputs[1][0], z[0],
numpy.intc(inputs[1][0].size), block=block, np.intc(inputs[1][0].size), block=block,
grid=grid) grid=grid)
thunk.inputs = inputs thunk.inputs = inputs
thunk.outputs = outputs thunk.outputs = outputs
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论