提交 c749187c authored 作者: Arnaud Bergeron's avatar Arnaud Bergeron

Add checks for copies and reassure observers that ga_int is 32 bits.

上级 8481389b
...@@ -561,6 +561,11 @@ class GPU_mrg_uniform(mrg_uniform_base, GpuOp): ...@@ -561,6 +561,11 @@ class GPU_mrg_uniform(mrg_uniform_base, GpuOp):
else else
{ {
%(o_rstate)s = (CudaNdarray*)CudaNdarray_Copy(%(rstate)s); %(o_rstate)s = (CudaNdarray*)CudaNdarray_Copy(%(rstate)s);
if (!%(o_rstate)s) {
PyErr_SetString(PyExc_RuntimeError, "GPU_mrg_uniform: "
"could not copy rstate");
%(fail)s
}
} }
if (PyArray_NDIM(%(o_rstate)s) != 1) if (PyArray_NDIM(%(o_rstate)s) != 1)
...@@ -607,7 +612,7 @@ class GPU_mrg_uniform(mrg_uniform_base, GpuOp): ...@@ -607,7 +612,7 @@ class GPU_mrg_uniform(mrg_uniform_base, GpuOp):
""" % locals() """ % locals()
def c_code_cache_version(self): def c_code_cache_version(self):
return (7,) return (8,)
class GPUA_mrg_uniform(GpuKernelBase, mrg_uniform_base): class GPUA_mrg_uniform(GpuKernelBase, mrg_uniform_base):
...@@ -640,6 +645,11 @@ class GPUA_mrg_uniform(GpuKernelBase, mrg_uniform_base): ...@@ -640,6 +645,11 @@ class GPUA_mrg_uniform(GpuKernelBase, mrg_uniform_base):
const ga_uint Nsamples, const ga_uint Nsamples,
const ga_uint Nstreams_used) const ga_uint Nstreams_used)
{ {
/*
* The cluda backend makes sure that ga_int corresponds to
* a 32 bit signed type on the target device. It is not a
* variable width type.
*/
const ga_int i7 = 7; const ga_int i7 = 7;
const ga_int i9 = 9; const ga_int i9 = 9;
const ga_int i15 = 15; const ga_int i15 = 15;
...@@ -793,6 +803,9 @@ class GPUA_mrg_uniform(GpuKernelBase, mrg_uniform_base): ...@@ -793,6 +803,9 @@ class GPUA_mrg_uniform(GpuKernelBase, mrg_uniform_base):
else else
{ {
%(o_rstate)s = pygpu_copy(%(rstate)s, GA_ANY_ORDER); %(o_rstate)s = pygpu_copy(%(rstate)s, GA_ANY_ORDER);
if (!%(o_rstate)s) {
%(fail)s
}
} }
if (PyGpuArray_NDIM(%(o_rstate)s) != 1) if (PyGpuArray_NDIM(%(o_rstate)s) != 1)
...@@ -827,7 +840,7 @@ class GPUA_mrg_uniform(GpuKernelBase, mrg_uniform_base): ...@@ -827,7 +840,7 @@ class GPUA_mrg_uniform(GpuKernelBase, mrg_uniform_base):
""" % locals() """ % locals()
def c_code_cache_version(self): def c_code_cache_version(self):
return (1, self.GpuKernelBase_version) return (2, self.GpuKernelBase_version)
def guess_n_streams(size, warn=True): def guess_n_streams(size, warn=True):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论