提交 b1013380 authored 作者: Frederic's avatar Frederic

make pycuda example check that the preallocated output is c_contiguous.

上级 54926a4b
...@@ -251,7 +251,9 @@ class PycudaElemwiseSourceModuleOp(GpuOp): ...@@ -251,7 +251,9 @@ class PycudaElemwiseSourceModuleOp(GpuOp):
#TODO support broadcast! #TODO support broadcast!
#TODO assert all input have the same shape #TODO assert all input have the same shape
z, = out z, = out
if z[0] is None or z[0].shape != inputs[0].shape: if (z[0] is None or
z[0].shape != inputs[0].shape or
not z[0].is_c_contiguous()):
z[0] = theano.sandbox.cuda.CudaNdarray.zeros(inputs[0].shape) z[0] = theano.sandbox.cuda.CudaNdarray.zeros(inputs[0].shape)
if inputs[0].shape != inputs[1].shape: if inputs[0].shape != inputs[1].shape:
raise TypeError("PycudaElemwiseSourceModuleOp:" raise TypeError("PycudaElemwiseSourceModuleOp:"
...@@ -339,7 +341,9 @@ class PycudaElemwiseSourceModuleMakeThunkOp(Op): ...@@ -339,7 +341,9 @@ class PycudaElemwiseSourceModuleMakeThunkOp(Op):
def thunk(): def thunk():
z = outputs[0] z = outputs[0]
if z[0] is None or z[0].shape != inputs[0][0].shape: if (z[0] is None or
z[0].shape != inputs[0][0].shape or
not z[0].is_c_contiguous()):
z[0] = theano.sandbox.cuda.CudaNdarray.zeros( z[0] = theano.sandbox.cuda.CudaNdarray.zeros(
inputs[0][0].shape) inputs[0][0].shape)
if inputs[0][0].shape != inputs[1][0].shape: if inputs[0][0].shape != inputs[1][0].shape:
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论