提交 3e2b3f10 authored 作者: Frederic's avatar Frederic

pep8

上级 5e93e6b8
...@@ -5,23 +5,30 @@ import StringIO ...@@ -5,23 +5,30 @@ import StringIO
from theano.sandbox.cuda.type import CudaNdarrayType from theano.sandbox.cuda.type import CudaNdarrayType
from theano.sandbox.cuda import GpuOp from theano.sandbox.cuda import GpuOp
from theano.sandbox.cuda.kernel_codegen import nvcc_kernel, inline_reduce_max, inline_reduce_sum, inline_softmax from theano.sandbox.cuda.kernel_codegen import (nvcc_kernel, inline_reduce_max,
inline_reduce_sum,
inline_softmax)
class GpuCrossentropySoftmaxArgmax1HotWithBias (GpuOp): class GpuCrossentropySoftmaxArgmax1HotWithBias (GpuOp):
""" """
Implement CrossentropySoftmaxArgmax1HotWithBias on the gpu. Implement CrossentropySoftmaxArgmax1HotWithBias on the gpu.
""" """
nin=3 nin = 3
nout=3 nout = 3
def __eq__(self, other): def __eq__(self, other):
return type(self) == type(other) return type(self) == type(other)
def __hash__(self): def __hash__(self):
return hash(type(self)) return hash(type(self))
def __str__(self): def __str__(self):
return self.__class__.__name__ return self.__class__.__name__
def make_node(self, x, b, y_idx): def make_node(self, x, b, y_idx):
nll = y_idx.type() #N.B. won't work when we don't cast y_idx to float anymore #N.B. won't work when we don't cast y_idx to float anymore
nll = y_idx.type()
sm = x.type() sm = x.type()
am = y_idx.type() am = y_idx.type()
return Apply(self, [x, b, y_idx], [nll, sm, am]) return Apply(self, [x, b, y_idx], [nll, sm, am])
...@@ -85,7 +92,7 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias (GpuOp): ...@@ -85,7 +92,7 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias (GpuOp):
def c_code(self, node, nodename, inp, out, sub): def c_code(self, node, nodename, inp, out, sub):
x, b, y_idx = inp x, b, y_idx = inp
nll, sm, am = out nll, sm, am = out
classname=self.__class__.__name__ classname = self.__class__.__name__
fail = sub['fail'] fail = sub['fail']
sio = StringIO.StringIO() sio = StringIO.StringIO()
print >> sio, """ print >> sio, """
...@@ -106,12 +113,14 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias (GpuOp): ...@@ -106,12 +113,14 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias (GpuOp):
} }
if (CudaNdarray_HOST_DIMS(%(x)s)[0] != CudaNdarray_HOST_DIMS(%(y_idx)s)[0]) if (CudaNdarray_HOST_DIMS(%(x)s)[0] != CudaNdarray_HOST_DIMS(%(y_idx)s)[0])
{ {
PyErr_SetString(PyExc_ValueError, "dimension mismatch in x,y_idx arguments"); PyErr_SetString(PyExc_ValueError,
"dimension mismatch in x,y_idx arguments");
%(fail)s; %(fail)s;
} }
if (CudaNdarray_HOST_DIMS(%(x)s)[1] != CudaNdarray_HOST_DIMS(%(b)s)[0]) if (CudaNdarray_HOST_DIMS(%(x)s)[1] != CudaNdarray_HOST_DIMS(%(b)s)[0])
{ {
PyErr_SetString(PyExc_ValueError, "dimension mismatch in x,b arguments"); PyErr_SetString(PyExc_ValueError,
"dimension mismatch in x,b arguments");
%(fail)s; %(fail)s;
} }
if ((NULL == %(nll)s) //initial condition if ((NULL == %(nll)s) //initial condition
...@@ -132,7 +141,8 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias (GpuOp): ...@@ -132,7 +141,8 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias (GpuOp):
%(sm)s = (CudaNdarray*) CudaNdarray_NewDims(2, CudaNdarray_HOST_DIMS(%(x)s)); %(sm)s = (CudaNdarray*) CudaNdarray_NewDims(2, CudaNdarray_HOST_DIMS(%(x)s));
if(!%(sm)s) if(!%(sm)s)
{ {
PyErr_SetString(PyExc_MemoryError, "failed to alloc sm output"); PyErr_SetString(PyExc_MemoryError,
"failed to alloc sm output");
// no need to decref cnda_nll, the cleanup code should pick it up. // no need to decref cnda_nll, the cleanup code should pick it up.
%(fail)s; %(fail)s;
} }
...@@ -144,7 +154,8 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias (GpuOp): ...@@ -144,7 +154,8 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias (GpuOp):
%(am)s = (CudaNdarray*) CudaNdarray_NewDims(1, CudaNdarray_HOST_DIMS(%(y_idx)s)); %(am)s = (CudaNdarray*) CudaNdarray_NewDims(1, CudaNdarray_HOST_DIMS(%(y_idx)s));
if(!%(am)s) if(!%(am)s)
{ {
PyErr_SetString(PyExc_MemoryError, "failed to alloc am output"); PyErr_SetString(PyExc_MemoryError,
"failed to alloc am output");
// no need to decref nll amd sm, the cleanup code should pick it up. // no need to decref nll amd sm, the cleanup code should pick it up.
%(fail)s; %(fail)s;
} }
...@@ -167,7 +178,9 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias (GpuOp): ...@@ -167,7 +178,9 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias (GpuOp):
cudaError_t err = cudaGetLastError(); cudaError_t err = cudaGetLastError();
if (cudaSuccess != err) if (cudaSuccess != err)
{ {
PyErr_Format(PyExc_RuntimeError, "Cuda error: %(classname)s %(nodename)s: %%s.\\n", cudaGetErrorString(err)); PyErr_Format(PyExc_RuntimeError,
"Cuda error: %(classname)s %(nodename)s: %%s.\\n",
cudaGetErrorString(err));
// no need to decref output vars the cleanup code should pick them up. // no need to decref output vars the cleanup code should pick them up.
%(fail)s; %(fail)s;
} }
...@@ -181,26 +194,33 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias (GpuOp): ...@@ -181,26 +194,33 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias (GpuOp):
gpu_crossentropy_softmax_argmax_1hot_with_bias = GpuCrossentropySoftmaxArgmax1HotWithBias() gpu_crossentropy_softmax_argmax_1hot_with_bias = GpuCrossentropySoftmaxArgmax1HotWithBias()
class GpuCrossentropySoftmax1HotWithBiasDx (GpuOp): class GpuCrossentropySoftmax1HotWithBiasDx (GpuOp):
""" """
Implement CrossentropySoftmax1HotWithBiasDx on the gpu. Implement CrossentropySoftmax1HotWithBiasDx on the gpu.
""" """
nin=3 nin = 3
nout=1 nout = 1
"""Gradient wrt x of the CrossentropySoftmax1Hot Op""" """Gradient wrt x of the CrossentropySoftmax1Hot Op"""
def __init__(self, **kwargs): def __init__(self, **kwargs):
Op.__init__(self,**kwargs) Op.__init__(self,**kwargs)
def __eq__(self, other): def __eq__(self, other):
return type(self) == type(other) return type(self) == type(other)
def __hash__(self): def __hash__(self):
return hash(type(self)) return hash(type(self))
def __str__(self): def __str__(self):
return self.__class__.__name__ return self.__class__.__name__
def make_node(self, dy, sm, y_idx): def make_node(self, dy, sm, y_idx):
return Apply(self, [dy, sm, y_idx],[sm.type()]) return Apply(self, [dy, sm, y_idx], [sm.type()])
def c_code_cache_version(self): def c_code_cache_version(self):
#return () #return ()
return (5,) return (5,)
def c_code(self, node, nodename, inp, out, sub): def c_code(self, node, nodename, inp, out, sub):
dnll, sm, y_idx = inp dnll, sm, y_idx = inp
dx, = out dx, = out
...@@ -221,7 +241,8 @@ class GpuCrossentropySoftmax1HotWithBiasDx (GpuOp): ...@@ -221,7 +241,8 @@ class GpuCrossentropySoftmax1HotWithBiasDx (GpuOp):
} }
if (CudaNdarray_HOST_DIMS(%(dnll)s)[0] != CudaNdarray_HOST_DIMS(%(y_idx)s)[0]) if (CudaNdarray_HOST_DIMS(%(dnll)s)[0] != CudaNdarray_HOST_DIMS(%(y_idx)s)[0])
{ {
PyErr_SetString(PyExc_ValueError, "dnll.shape[0] != y_idx.shape[0]"); PyErr_SetString(PyExc_ValueError,
"dnll.shape[0] != y_idx.shape[0]");
%(fail)s; %(fail)s;
} }
if ((NULL == %(dx)s) if ((NULL == %(dx)s)
...@@ -265,7 +286,10 @@ class GpuCrossentropySoftmax1HotWithBiasDx (GpuOp): ...@@ -265,7 +286,10 @@ class GpuCrossentropySoftmax1HotWithBiasDx (GpuOp):
cudaError_t err = cudaGetLastError(); cudaError_t err = cudaGetLastError();
if( cudaSuccess != err) if( cudaSuccess != err)
{ {
PyErr_Format(PyExc_RuntimeError, "Cuda error: %%s: %%s.\\n", "kCrossEntropySoftmax1HotWithBiasDx_%(nodename)s", cudaGetErrorString(err)); PyErr_Format(PyExc_RuntimeError,
"Cuda error: %%s: %%s.\\n",
"kCrossEntropySoftmax1HotWithBiasDx_%(nodename)s",
cudaGetErrorString(err));
%(fail)s; %(fail)s;
} }
} }
...@@ -305,23 +329,30 @@ class GpuCrossentropySoftmax1HotWithBiasDx (GpuOp): ...@@ -305,23 +329,30 @@ class GpuCrossentropySoftmax1HotWithBiasDx (GpuOp):
gpu_crossentropy_softmax_1hot_with_bias_dx = GpuCrossentropySoftmax1HotWithBiasDx() gpu_crossentropy_softmax_1hot_with_bias_dx = GpuCrossentropySoftmax1HotWithBiasDx()
class GpuSoftmax (GpuOp): class GpuSoftmax (GpuOp):
""" """
Implement Softmax on the gpu. Implement Softmax on the gpu.
""" """
def __eq__(self, other): def __eq__(self, other):
return type(self) == type(other) return type(self) == type(other)
def __hash__(self): def __hash__(self):
return hash(type(self)) return hash(type(self))
def __str__(self): def __str__(self):
return self.__class__.__name__ return self.__class__.__name__
def make_node(self, x): def make_node(self, x):
return Apply(self, [x],[x.type()]) return Apply(self, [x], [x.type()])
def infer_shape(self, node, shape): def infer_shape(self, node, shape):
return shape return shape
def c_code_cache_version(self): def c_code_cache_version(self):
#return () #return ()
return (6,) + inline_softmax.code_version return (6,) + inline_softmax.code_version
def c_code(self, node, nodename, inp, out, sub): def c_code(self, node, nodename, inp, out, sub):
x, = inp x, = inp
z, = out z, = out
...@@ -332,14 +363,15 @@ class GpuSoftmax (GpuOp): ...@@ -332,14 +363,15 @@ class GpuSoftmax (GpuOp):
PyErr_SetString(PyExc_ValueError, "rank error"); PyErr_SetString(PyExc_ValueError, "rank error");
%(fail)s; %(fail)s;
} }
if ((NULL == %(z)s) if ((NULL == %(z)s) ||
|| (CudaNdarray_HOST_DIMS(%(z)s)[0] != CudaNdarray_HOST_DIMS(%(x)s)[0]) (CudaNdarray_HOST_DIMS(%(z)s)[0] != CudaNdarray_HOST_DIMS(%(x)s)[0]) ||
|| (CudaNdarray_HOST_DIMS(%(z)s)[1] != CudaNdarray_HOST_DIMS(%(x)s)[1])) (CudaNdarray_HOST_DIMS(%(z)s)[1] != CudaNdarray_HOST_DIMS(%(x)s)[1]))
{ {
Py_XDECREF(%(z)s); Py_XDECREF(%(z)s);
%(z)s = (CudaNdarray*)CudaNdarray_New(); %(z)s = (CudaNdarray*)CudaNdarray_New();
if ((NULL == %(z)s) if ((NULL == %(z)s)
|| CudaNdarray_alloc_contiguous(%(z)s, 2, CudaNdarray_HOST_DIMS(%(x)s))) || CudaNdarray_alloc_contiguous(%(z)s, 2,
CudaNdarray_HOST_DIMS(%(x)s)))
{ {
Py_XDECREF(%(z)s); Py_XDECREF(%(z)s);
%(z)s = NULL; %(z)s = NULL;
...@@ -354,7 +386,6 @@ class GpuSoftmax (GpuOp): ...@@ -354,7 +386,6 @@ class GpuSoftmax (GpuOp):
kSoftmax_%(nodename)s kSoftmax_%(nodename)s
<<< <<<
// todo: cap these at the card limits, implement loops in kernel
n_blocks, n_blocks,
n_threads, n_threads,
n_shared_bytes n_shared_bytes
...@@ -382,7 +413,7 @@ class GpuSoftmax (GpuOp): ...@@ -382,7 +413,7 @@ class GpuSoftmax (GpuOp):
""" % locals() """ % locals()
def c_support_code_apply(self, node, nodename): def c_support_code_apply(self, node, nodename):
return nvcc_kernel("kSoftmax_%s"%nodename, return nvcc_kernel("kSoftmax_%s" % nodename,
params=['int M', 'int N', params=['int M', 'int N',
'const float * x', 'const int sx0', 'const int sx1', 'const float * x', 'const int sx0', 'const int sx1',
'float * sm', 'const int sm_s0', 'const int sm_s1'], 'float * sm', 'const int sm_s0', 'const int sm_s1'],
...@@ -395,9 +426,11 @@ class GpuSoftmax (GpuOp): ...@@ -395,9 +426,11 @@ class GpuSoftmax (GpuOp):
"buf2[tx] = buf[tx]", "buf2[tx] = buf[tx]",
"}", "}",
"__syncthreads()", "__syncthreads()",
inline_softmax('N', 'buf', 'buf2', 'threadIdx.x', 'blockDim.x'), inline_softmax('N', 'buf', 'buf2',
'threadIdx.x', 'blockDim.x'),
"for (int tx = threadIdx.x; tx< N; tx += blockDim.x){", "for (int tx = threadIdx.x; tx< N; tx += blockDim.x){",
"sm[blockIDX * sm_s0 + tx * sm_s1] = buf[tx]",# This set all value correctly # This set all value correctly
"sm[blockIDX * sm_s0 + tx * sm_s1] = buf[tx]",
"}", "}",
"__syncthreads()", "__syncthreads()",
"}", "}",
...@@ -405,22 +438,29 @@ class GpuSoftmax (GpuOp): ...@@ -405,22 +438,29 @@ class GpuSoftmax (GpuOp):
gpu_softmax = GpuSoftmax() gpu_softmax = GpuSoftmax()
class GpuSoftmaxWithBias (GpuOp): class GpuSoftmaxWithBias (GpuOp):
""" """
Implement SoftmaxWithBias on the gpu. Implement SoftmaxWithBias on the gpu.
""" """
nin = 2 nin = 2
nout = 1 nout = 1
def __eq__(self, other): def __eq__(self, other):
return type(self) == type(other) return type(self) == type(other)
def __hash__(self): def __hash__(self):
return hash(type(self)) return hash(type(self))
def __str__(self): def __str__(self):
return self.__class__.__name__ return self.__class__.__name__
def make_node(self, x, b): def make_node(self, x, b):
return Apply(self, [x, b],[x.type()]) return Apply(self, [x, b], [x.type()])
def infer_shape(self, node, shape): def infer_shape(self, node, shape):
return [shape[0]] return [shape[0]]
def c_code_cache_version(self): def c_code_cache_version(self):
#return () #return ()
return (6,) + inline_softmax.code_version return (6,) + inline_softmax.code_version
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论