提交 0f63d704 authored 作者: Pascal Lamblin's avatar Pascal Lamblin

Fix GpuDot22{,Scalar} with unsupported output strides

上级 337fafa0
...@@ -30,7 +30,7 @@ class GpuDot22(GpuOp): ...@@ -30,7 +30,7 @@ class GpuDot22(GpuOp):
return Apply(self, [x, y], [otype()]) return Apply(self, [x, y], [otype()])
def c_code_cache_version(self): def c_code_cache_version(self):
return (1, 1) return (1, 2)
def c_code(self, node, nodename, inputs, outputs, sub): def c_code(self, node, nodename, inputs, outputs, sub):
x, y = inputs x, y = inputs
...@@ -51,9 +51,14 @@ class GpuDot22(GpuOp): ...@@ -51,9 +51,14 @@ class GpuDot22(GpuOp):
|| (CudaNdarray_HOST_DIMS(%(z)s)[0] != || (CudaNdarray_HOST_DIMS(%(z)s)[0] !=
CudaNdarray_HOST_DIMS(%(x)s)[0]) CudaNdarray_HOST_DIMS(%(x)s)[0])
|| (CudaNdarray_HOST_DIMS(%(z)s)[1] != || (CudaNdarray_HOST_DIMS(%(z)s)[1] !=
CudaNdarray_HOST_DIMS(%(y)s)[1])) CudaNdarray_HOST_DIMS(%(y)s)[1])
|| (CudaNdarray_HOST_STRIDES(%(z)s)[0] < 0)
|| (CudaNdarray_HOST_STRIDES(%(z)s)[1] < 0)
|| ((CudaNdarray_HOST_DIMS(%(z)s)[0] > 1)
&& (CudaNdarray_HOST_STRIDES(%(z)s)[0] != 1)
&& (CudaNdarray_HOST_DIMS(%(z)s)[1] > 1)
&& (CudaNdarray_HOST_STRIDES(%(z)s)[1] != 1)))
{ {
//if (%(z)s) Py_DECREF(%(z)s);
Py_XDECREF(%(z)s); Py_XDECREF(%(z)s);
npy_intp dims[2]; npy_intp dims[2];
dims[0] = CudaNdarray_HOST_DIMS(%(x)s)[0]; dims[0] = CudaNdarray_HOST_DIMS(%(x)s)[0];
...@@ -108,7 +113,7 @@ class GpuDot22Scalar(GpuOp): ...@@ -108,7 +113,7 @@ class GpuDot22Scalar(GpuOp):
return Apply(self, [x, y, a], [otype()]) return Apply(self, [x, y, a], [otype()])
def c_code_cache_version(self): def c_code_cache_version(self):
return (1, 1) return (1, 2)
def c_code(self, node, name, inputs, outputs, sub): def c_code(self, node, name, inputs, outputs, sub):
x, y, a = inputs x, y, a = inputs
...@@ -135,7 +140,13 @@ class GpuDot22Scalar(GpuOp): ...@@ -135,7 +140,13 @@ class GpuDot22Scalar(GpuOp):
(CudaNdarray_HOST_DIMS(%(z)s)[0] != (CudaNdarray_HOST_DIMS(%(z)s)[0] !=
CudaNdarray_HOST_DIMS(%(x)s)[0]) || CudaNdarray_HOST_DIMS(%(x)s)[0]) ||
(CudaNdarray_HOST_DIMS(%(z)s)[1] != (CudaNdarray_HOST_DIMS(%(z)s)[1] !=
CudaNdarray_HOST_DIMS(%(y)s)[1])) CudaNdarray_HOST_DIMS(%(y)s)[1])
|| (CudaNdarray_HOST_STRIDES(%(z)s)[0] < 0)
|| (CudaNdarray_HOST_STRIDES(%(z)s)[1] < 0)
|| ((CudaNdarray_HOST_DIMS(%(z)s)[0] > 1)
&& (CudaNdarray_HOST_STRIDES(%(z)s)[0] != 1)
&& (CudaNdarray_HOST_DIMS(%(z)s)[1] > 1)
&& (CudaNdarray_HOST_STRIDES(%(z)s)[1] != 1)))
{ {
//if (%(z)s) Py_DECREF(%(z)s); //if (%(z)s) Py_DECREF(%(z)s);
Py_XDECREF(%(z)s); Py_XDECREF(%(z)s);
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论