提交 a1120adf authored 作者: Frederic's avatar Frederic

Fix compilation crash just introduced. Refactor to make sure row_max

is avaiable not just in the inner of the loop for the Cross entropy fct. This also remove duplicate code.
上级 b3c09290
...@@ -171,9 +171,7 @@ class SoftmaxWithBias(gof.Op): ...@@ -171,9 +171,7 @@ class SoftmaxWithBias(gof.Op):
const dtype_%(x)s* __restrict__ x_i = (dtype_%(x)s*)(PyArray_BYTES(%(x)s) + PyArray_STRIDES(%(x)s)[0] * i); const dtype_%(x)s* __restrict__ x_i = (dtype_%(x)s*)(PyArray_BYTES(%(x)s) + PyArray_STRIDES(%(x)s)[0] * i);
const dtype_%(b)s* __restrict__ b_i = (dtype_%(b)s*)(PyArray_BYTES(%(b)s)); const dtype_%(b)s* __restrict__ b_i = (dtype_%(b)s*)(PyArray_BYTES(%(b)s));
dtype_%(sm) s* __restrict__ sm_i = (dtype_%(sm)s*)(PyArray_BYTES(%(sm)s) + PyArray_STRIDES(%(sm)s)[0] * i); dtype_%(sm) s* __restrict__ sm_i = (dtype_%(sm)s*)(PyArray_BYTES(%(sm)s) + PyArray_STRIDES(%(sm)s)[0] * i);
"""
inside_row_loop = """
npy_intp Sx = PyArray_STRIDES(%(x)s)[1]/sizeof(dtype_%(x)s); npy_intp Sx = PyArray_STRIDES(%(x)s)[1]/sizeof(dtype_%(x)s);
npy_intp Sb = PyArray_STRIDES(%(b)s)[0]/sizeof(dtype_%(b)s); npy_intp Sb = PyArray_STRIDES(%(b)s)[0]/sizeof(dtype_%(b)s);
npy_intp Ssm = PyArray_STRIDES(%(sm)s)[1]/sizeof(dtype_%(sm)s); npy_intp Ssm = PyArray_STRIDES(%(sm)s)[1]/sizeof(dtype_%(sm)s);
...@@ -190,6 +188,9 @@ class SoftmaxWithBias(gof.Op): ...@@ -190,6 +188,9 @@ class SoftmaxWithBias(gof.Op):
row_max = (row_ij > row_max) ? row_ij : row_max; row_max = (row_ij > row_max) ? row_ij : row_max;
} }
"""
inside_row_loop = """
for (j = 0; j < Nx[1]; ++j) for (j = 0; j < Nx[1]; ++j)
{ {
dtype_%(sm)s row_ij = x_i[j * Sx] + b_i[j * Sb]; dtype_%(sm)s row_ij = x_i[j * Sx] + b_i[j * Sb];
...@@ -214,18 +215,6 @@ class SoftmaxWithBias(gof.Op): ...@@ -214,18 +215,6 @@ class SoftmaxWithBias(gof.Op):
vec_exp = theano.scalar.exp.c_code_contiguous_raw(dtype, vec_exp = theano.scalar.exp.c_code_contiguous_raw(dtype,
"Nx[1]", "sm_i", "sm_i") "Nx[1]", "sm_i", "sm_i")
inside_row_loop_contig = """ inside_row_loop_contig = """
size_t row_max_j=0;
dtype_%%(sm)s row_max = x_i[0] + b_i[0];
//std::cout << "0 " << row_max << "\\n";
// Get the maximum value of the row
for (j = 1; j < Nx[1]; ++j)
{
dtype_%%(sm)s row_ij = x_i[j * Sx] + b_i[j * Sb];
//std::cout << "1 " << row_ij << "\\n";
row_max_j = (row_ij > row_max) ? j : row_max_j;
row_max = (row_ij > row_max) ? row_ij : row_max;
}
for (j = 0; j < Nx[1]; ++j) for (j = 0; j < Nx[1]; ++j)
{ {
dtype_%%(sm)s row_ij = x_i[j * Sx] + b_i[j * Sb]; dtype_%%(sm)s row_ij = x_i[j * Sx] + b_i[j * Sb];
...@@ -272,7 +261,7 @@ class SoftmaxWithBias(gof.Op): ...@@ -272,7 +261,7 @@ class SoftmaxWithBias(gof.Op):
@staticmethod @staticmethod
def c_code_cache_version(): def c_code_cache_version():
return (7,) return (8,)
softmax_with_bias = SoftmaxWithBias() softmax_with_bias = SoftmaxWithBias()
...@@ -494,9 +483,7 @@ class Softmax(gof.Op): ...@@ -494,9 +483,7 @@ class Softmax(gof.Op):
const dtype_%(x)s* __restrict__ x_i = (dtype_%(x)s*)(PyArray_BYTES(%(x)s) + PyArray_STRIDES(%(x)s)[0] * i); const dtype_%(x)s* __restrict__ x_i = (dtype_%(x)s*)(PyArray_BYTES(%(x)s) + PyArray_STRIDES(%(x)s)[0] * i);
dtype_%(sm) s* __restrict__ sm_i = (dtype_%(sm)s*)(PyArray_BYTES(%(sm)s) + PyArray_STRIDES(%(sm)s)[0] * i); dtype_%(sm) s* __restrict__ sm_i = (dtype_%(sm)s*)(PyArray_BYTES(%(sm)s) + PyArray_STRIDES(%(sm)s)[0] * i);
"""
inside_row_loop = """
size_t row_max_j=0; size_t row_max_j=0;
dtype_%(sm)s row_max = x_i[0]; dtype_%(sm)s row_max = x_i[0];
//std::cout << "0 " << row_max << "\\n"; //std::cout << "0 " << row_max << "\\n";
...@@ -509,6 +496,9 @@ class Softmax(gof.Op): ...@@ -509,6 +496,9 @@ class Softmax(gof.Op):
row_max = (row_ij > row_max) ? row_ij : row_max; row_max = (row_ij > row_max) ? row_ij : row_max;
} }
"""
inside_row_loop = """
for (j = 0; j < Nx[1]; ++j) for (j = 0; j < Nx[1]; ++j)
{ {
dtype_%(sm)s row_ij = x_i[j * Sx1] ; dtype_%(sm)s row_ij = x_i[j * Sx1] ;
...@@ -532,18 +522,6 @@ class Softmax(gof.Op): ...@@ -532,18 +522,6 @@ class Softmax(gof.Op):
vec_exp = theano.scalar.exp.c_code_contiguous_raw(dtype, vec_exp = theano.scalar.exp.c_code_contiguous_raw(dtype,
"Nx[1]", "sm_i", "sm_i") "Nx[1]", "sm_i", "sm_i")
inside_row_loop_contig = """ inside_row_loop_contig = """
size_t row_max_j=0;
dtype_%%(sm)s row_max = x_i[0];
//std::cout << "0 " << row_max << "\\n";
// Get the maximum value of the row
for (j = 1; j < Nx[1]; ++j)
{
dtype_%%(sm)s row_ij = x_i[j * Sx1] ;
//std::cout << "1 " << row_ij << "\\n";
row_max_j = (row_ij > row_max) ? j : row_max_j;
row_max = (row_ij > row_max) ? row_ij : row_max;
}
for (j = 0; j < Nx[1]; ++j) for (j = 0; j < Nx[1]; ++j)
{ {
sm_i[j * Ssm1] = x_i[j * Sx1] - row_max; sm_i[j * Ssm1] = x_i[j * Sx1] - row_max;
...@@ -587,7 +565,7 @@ class Softmax(gof.Op): ...@@ -587,7 +565,7 @@ class Softmax(gof.Op):
@staticmethod @staticmethod
def c_code_cache_version(): def c_code_cache_version():
return (2,) return (3,)
softmax = Softmax() softmax = Softmax()
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论