提交 c1765257 authored 作者: James Bergstra's avatar James Bergstra

merge

......@@ -185,5 +185,5 @@ class GpuConv(Op):
logical_img_shape=self.logical_img_hw,
logical_kern_shape=self.logical_kern_hw,
kern_align=self.logical_kern_align_top,
verbose=1)
verbose=0)
......@@ -224,8 +224,6 @@ class GpuCrossentropySoftmax1HotWithBiasDx (Op):
}
}
{
std::cerr << "LAUNCHING NeW KEWNEL\\n";
kCrossEntropySoftmax1HotWithBiasDx_%(nodename)s
<<<
CudaNdarray_HOST_DIMS(cnda_%(dx)s)[0],
......@@ -233,6 +231,7 @@ class GpuCrossentropySoftmax1HotWithBiasDx (Op):
>>>(
CudaNdarray_HOST_DIMS(cnda_%(dx)s)[0],
CudaNdarray_HOST_DIMS(cnda_%(dx)s)[1],
CudaNdarray_DEV_DATA(cnda_%(dnll)s),
CudaNdarray_HOST_STRIDES(cnda_%(dnll)s)[0],
......@@ -265,23 +264,25 @@ class GpuCrossentropySoftmax1HotWithBiasDx (Op):
const float * y_idx, const int y_idx_s0,
float * dx)
{
return;
for (size_t i = blockIdx.x; i < N; i += gridDim.x)
for (int i = blockIdx.x; i < N; i += gridDim.x)
{
float dnll_i = dnll[i * dnll_s0];
int y_i = (int)y_idx[i * y_idx_s0];
for (size_t j = threadIdx.x; j < K; j += blockDim.x)
for (int j = threadIdx.x; j < K; j += blockDim.x)
{
if (y_i == j)
{
dx[i * K + j] = dnll_i * (sm[i * sm_s0 + j * sm_s1]-1);
dx[i * K + j] = dnll_i * (sm[i * sm_s0 + j * sm_s1]-1.0);
}
else
{
dx[i * K + j] = dnll_i * sm[i * sm_s0 + j * sm_s1];
}
//dx[i * K + j] = dnll_i * sm[i * sm_s0 + j * sm_s1];
//dx[i*K+j] = 0;
}
}
}
""" % locals()
......@@ -255,7 +255,6 @@ def local_gpu_crossentorpy_softmax_argmax_1hot_with_bias(node):
@register_opt()
@local_optimizer([])
def local_gpu_crossentorpy_softmax_1hot_with_bias_dx(node):
print 'REPLACING ', node, '??'
if isinstance(node.op, tensor.nnet.CrossentropySoftmax1HotWithBiasDx):
dnll,sm,yidx = node.inputs
if sm.owner and sm.owner.op == host_from_gpu:
......@@ -264,6 +263,5 @@ def local_gpu_crossentorpy_softmax_1hot_with_bias_dx(node):
gpu_from_host(dnll),
gpu_sm,
gpu_from_host(cast(yidx, 'float32')))
print 'YEP ', node
return [host_from_gpu(gpu_dx)]
return False
......@@ -221,7 +221,7 @@ def test_conv_nnet2():
print rval_cpu[0], rval_gpu[0],rval_cpu[0]-rval_gpu[0]
assert numpy.allclose(rval_cpu, rval_gpu,rtol=1e-4,atol=1e-4)
def run_conv_nnet2_classif(shared_fn, isize, ksize):
def run_conv_nnet2_classif(shared_fn, isize, ksize, n_iter=25):
n_batch = 60
shape_img = (n_batch, 1, isize, isize)
......@@ -242,7 +242,7 @@ def run_conv_nnet2_classif(shared_fn, isize, ksize):
b0 = shared_fn(numpy.asarray(numpy.zeros((n_kern,1,1)), dtype='float32'), 'b0')
w1 = shared_fn(numpy.asarray(0.01*(numpy.random.rand(*shape_kern1)-0.5), dtype='float32'), 'w1')
b1 = shared_fn(numpy.asarray(numpy.zeros((n_kern1,1,1)), dtype='float32'), 'b1')
v = shared_fn(numpy.asarray(numpy.zeros((n_hid, n_out)), dtype='float32'), 'c')
v = shared_fn(numpy.asarray(0.01*numpy.random.randn(n_hid, n_out), dtype='float32'), 'c')
c = shared_fn(numpy.asarray(numpy.zeros(n_out), dtype='float32'), 'c')
x = tensor.Tensor(dtype='float32', broadcastable=(0,0,0,0))('x')
......@@ -267,15 +267,18 @@ def run_conv_nnet2_classif(shared_fn, isize, ksize):
print 'building pfunc ...'
train = pfunc([x,y,lr], [loss], mode=mode, updates=[(p, p-g) for p,g in zip(params, gparams)])
for i, n in enumerate(train.maker.env.toposort()):
print i, n
if theano.compile.mode.default_mode == 'PROFILE_MODE':
for i, n in enumerate(train.maker.env.toposort()):
print i, n
xval = numpy.asarray(numpy.random.rand(*shape_img), dtype='float32')
yval = numpy.asarray(numpy.random.rand(n_batch,n_out), dtype='int32')#FRED: THIS DON'T WORK. THIS SET YVAL TO ALL ZERO!
lr = numpy.asarray(0.01, dtype='float32')
yval = numpy.asarray(numpy.random.rand(n_batch,n_out), dtype='float32')
lr = numpy.asarray(0.001, dtype='float32')
for i in xrange(10):
for i in xrange(n_iter):
rval = train(xval, yval, lr)
if i % 10 == 0:
print 'rval', rval
print_mode(mode)
return rval
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论