提交 c1765257 authored 作者: James Bergstra's avatar James Bergstra

merge

...@@ -185,5 +185,5 @@ class GpuConv(Op): ...@@ -185,5 +185,5 @@ class GpuConv(Op):
logical_img_shape=self.logical_img_hw, logical_img_shape=self.logical_img_hw,
logical_kern_shape=self.logical_kern_hw, logical_kern_shape=self.logical_kern_hw,
kern_align=self.logical_kern_align_top, kern_align=self.logical_kern_align_top,
verbose=1) verbose=0)
...@@ -224,8 +224,6 @@ class GpuCrossentropySoftmax1HotWithBiasDx (Op): ...@@ -224,8 +224,6 @@ class GpuCrossentropySoftmax1HotWithBiasDx (Op):
} }
} }
{ {
std::cerr << "LAUNCHING NeW KEWNEL\\n";
kCrossEntropySoftmax1HotWithBiasDx_%(nodename)s kCrossEntropySoftmax1HotWithBiasDx_%(nodename)s
<<< <<<
CudaNdarray_HOST_DIMS(cnda_%(dx)s)[0], CudaNdarray_HOST_DIMS(cnda_%(dx)s)[0],
...@@ -233,6 +231,7 @@ class GpuCrossentropySoftmax1HotWithBiasDx (Op): ...@@ -233,6 +231,7 @@ class GpuCrossentropySoftmax1HotWithBiasDx (Op):
>>>( >>>(
CudaNdarray_HOST_DIMS(cnda_%(dx)s)[0], CudaNdarray_HOST_DIMS(cnda_%(dx)s)[0],
CudaNdarray_HOST_DIMS(cnda_%(dx)s)[1], CudaNdarray_HOST_DIMS(cnda_%(dx)s)[1],
CudaNdarray_DEV_DATA(cnda_%(dnll)s), CudaNdarray_DEV_DATA(cnda_%(dnll)s),
CudaNdarray_HOST_STRIDES(cnda_%(dnll)s)[0], CudaNdarray_HOST_STRIDES(cnda_%(dnll)s)[0],
...@@ -265,23 +264,25 @@ class GpuCrossentropySoftmax1HotWithBiasDx (Op): ...@@ -265,23 +264,25 @@ class GpuCrossentropySoftmax1HotWithBiasDx (Op):
const float * y_idx, const int y_idx_s0, const float * y_idx, const int y_idx_s0,
float * dx) float * dx)
{ {
return; for (int i = blockIdx.x; i < N; i += gridDim.x)
for (size_t i = blockIdx.x; i < N; i += gridDim.x)
{ {
float dnll_i = dnll[i * dnll_s0]; float dnll_i = dnll[i * dnll_s0];
int y_i = (int)y_idx[i * y_idx_s0]; int y_i = (int)y_idx[i * y_idx_s0];
for (size_t j = threadIdx.x; j < K; j += blockDim.x) for (int j = threadIdx.x; j < K; j += blockDim.x)
{ {
if (y_i == j) if (y_i == j)
{ {
dx[i * K + j] = dnll_i * (sm[i * sm_s0 + j * sm_s1]-1); dx[i * K + j] = dnll_i * (sm[i * sm_s0 + j * sm_s1]-1.0);
} }
else else
{ {
dx[i * K + j] = dnll_i * sm[i * sm_s0 + j * sm_s1]; dx[i * K + j] = dnll_i * sm[i * sm_s0 + j * sm_s1];
} }
//dx[i * K + j] = dnll_i * sm[i * sm_s0 + j * sm_s1];
//dx[i*K+j] = 0;
} }
} }
} }
""" % locals() """ % locals()
...@@ -255,7 +255,6 @@ def local_gpu_crossentorpy_softmax_argmax_1hot_with_bias(node): ...@@ -255,7 +255,6 @@ def local_gpu_crossentorpy_softmax_argmax_1hot_with_bias(node):
@register_opt() @register_opt()
@local_optimizer([]) @local_optimizer([])
def local_gpu_crossentorpy_softmax_1hot_with_bias_dx(node): def local_gpu_crossentorpy_softmax_1hot_with_bias_dx(node):
print 'REPLACING ', node, '??'
if isinstance(node.op, tensor.nnet.CrossentropySoftmax1HotWithBiasDx): if isinstance(node.op, tensor.nnet.CrossentropySoftmax1HotWithBiasDx):
dnll,sm,yidx = node.inputs dnll,sm,yidx = node.inputs
if sm.owner and sm.owner.op == host_from_gpu: if sm.owner and sm.owner.op == host_from_gpu:
...@@ -264,6 +263,5 @@ def local_gpu_crossentorpy_softmax_1hot_with_bias_dx(node): ...@@ -264,6 +263,5 @@ def local_gpu_crossentorpy_softmax_1hot_with_bias_dx(node):
gpu_from_host(dnll), gpu_from_host(dnll),
gpu_sm, gpu_sm,
gpu_from_host(cast(yidx, 'float32'))) gpu_from_host(cast(yidx, 'float32')))
print 'YEP ', node
return [host_from_gpu(gpu_dx)] return [host_from_gpu(gpu_dx)]
return False return False
...@@ -221,7 +221,7 @@ def test_conv_nnet2(): ...@@ -221,7 +221,7 @@ def test_conv_nnet2():
print rval_cpu[0], rval_gpu[0],rval_cpu[0]-rval_gpu[0] print rval_cpu[0], rval_gpu[0],rval_cpu[0]-rval_gpu[0]
assert numpy.allclose(rval_cpu, rval_gpu,rtol=1e-4,atol=1e-4) assert numpy.allclose(rval_cpu, rval_gpu,rtol=1e-4,atol=1e-4)
def run_conv_nnet2_classif(shared_fn, isize, ksize): def run_conv_nnet2_classif(shared_fn, isize, ksize, n_iter=25):
n_batch = 60 n_batch = 60
shape_img = (n_batch, 1, isize, isize) shape_img = (n_batch, 1, isize, isize)
...@@ -242,7 +242,7 @@ def run_conv_nnet2_classif(shared_fn, isize, ksize): ...@@ -242,7 +242,7 @@ def run_conv_nnet2_classif(shared_fn, isize, ksize):
b0 = shared_fn(numpy.asarray(numpy.zeros((n_kern,1,1)), dtype='float32'), 'b0') b0 = shared_fn(numpy.asarray(numpy.zeros((n_kern,1,1)), dtype='float32'), 'b0')
w1 = shared_fn(numpy.asarray(0.01*(numpy.random.rand(*shape_kern1)-0.5), dtype='float32'), 'w1') w1 = shared_fn(numpy.asarray(0.01*(numpy.random.rand(*shape_kern1)-0.5), dtype='float32'), 'w1')
b1 = shared_fn(numpy.asarray(numpy.zeros((n_kern1,1,1)), dtype='float32'), 'b1') b1 = shared_fn(numpy.asarray(numpy.zeros((n_kern1,1,1)), dtype='float32'), 'b1')
v = shared_fn(numpy.asarray(numpy.zeros((n_hid, n_out)), dtype='float32'), 'c') v = shared_fn(numpy.asarray(0.01*numpy.random.randn(n_hid, n_out), dtype='float32'), 'c')
c = shared_fn(numpy.asarray(numpy.zeros(n_out), dtype='float32'), 'c') c = shared_fn(numpy.asarray(numpy.zeros(n_out), dtype='float32'), 'c')
x = tensor.Tensor(dtype='float32', broadcastable=(0,0,0,0))('x') x = tensor.Tensor(dtype='float32', broadcastable=(0,0,0,0))('x')
...@@ -267,15 +267,18 @@ def run_conv_nnet2_classif(shared_fn, isize, ksize): ...@@ -267,15 +267,18 @@ def run_conv_nnet2_classif(shared_fn, isize, ksize):
print 'building pfunc ...' print 'building pfunc ...'
train = pfunc([x,y,lr], [loss], mode=mode, updates=[(p, p-g) for p,g in zip(params, gparams)]) train = pfunc([x,y,lr], [loss], mode=mode, updates=[(p, p-g) for p,g in zip(params, gparams)])
for i, n in enumerate(train.maker.env.toposort()): if theano.compile.mode.default_mode == 'PROFILE_MODE':
print i, n for i, n in enumerate(train.maker.env.toposort()):
print i, n
xval = numpy.asarray(numpy.random.rand(*shape_img), dtype='float32') xval = numpy.asarray(numpy.random.rand(*shape_img), dtype='float32')
yval = numpy.asarray(numpy.random.rand(n_batch,n_out), dtype='int32')#FRED: THIS DON'T WORK. THIS SET YVAL TO ALL ZERO! yval = numpy.asarray(numpy.random.rand(n_batch,n_out), dtype='float32')
lr = numpy.asarray(0.01, dtype='float32') lr = numpy.asarray(0.001, dtype='float32')
for i in xrange(10): for i in xrange(n_iter):
rval = train(xval, yval, lr) rval = train(xval, yval, lr)
if i % 10 == 0:
print 'rval', rval
print_mode(mode) print_mode(mode)
return rval return rval
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论