提交 9c146bd8 authored 作者: James Bergstra's avatar James Bergstra

corrected test_nnet, misc fixes.

上级 b3528941
...@@ -27,7 +27,8 @@ class HostFromGpu(Op): ...@@ -27,7 +27,8 @@ class HostFromGpu(Op):
def perform(self, node, (x,), (z,)): def perform(self, node, (x,), (z,)):
z[0] = numpy.asarray(x) z[0] = numpy.asarray(x)
def grad(self, inputs, (gz,)): def grad(self, inputs, (gz,)):
return [GpuFromHost()(gz)] return gz,
#return [GpuFromHost()(gz)]
host_from_gpu = HostFromGpu() host_from_gpu = HostFromGpu()
class GpuFromHost(Op): class GpuFromHost(Op):
...@@ -44,7 +45,8 @@ class GpuFromHost(Op): ...@@ -44,7 +45,8 @@ class GpuFromHost(Op):
def perform(self, node, (x,), (z,)): def perform(self, node, (x,), (z,)):
z[0] = type_support_filter(numpy.asarray(x, dtype='float32'), tuple([0]*x.ndim), 0) z[0] = type_support_filter(numpy.asarray(x, dtype='float32'), tuple([0]*x.ndim), 0)
def grad(self, inputs, (gz,)): def grad(self, inputs, (gz,)):
return [HostFromGpu()(gz)] return gz,
#return [HostFromGpu()(gz)]
gpu_from_host = GpuFromHost() gpu_from_host = GpuFromHost()
...@@ -256,7 +258,7 @@ class GpuElemwise(Op): ...@@ -256,7 +258,7 @@ class GpuElemwise(Op):
{ {
int threads_per_block = std::min(numEls, (unsigned int)NUM_VECTOR_OP_THREADS_PER_BLOCK); int threads_per_block = std::min(numEls, (unsigned int)NUM_VECTOR_OP_THREADS_PER_BLOCK);
//a ceil would be better here //a ceil would be better here
int n_blocks = std::min(numEls/threads_per_block + 1, (unsigned int)NUM_VECTOR_OP_BLOCKS); int n_blocks = std::min(numEls/threads_per_block + (numEls %% threads_per_block?1:0), (unsigned int)NUM_VECTOR_OP_BLOCKS);
kernel_%(nodename)s<<<n_blocks, threads_per_block>>>(%(kernel_call_args)s); kernel_%(nodename)s<<<n_blocks, threads_per_block>>>(%(kernel_call_args)s);
//std::cerr << "ADDCALL a str" << i0_str[0] << " "<< i0_str[1] << "\\n"; //std::cerr << "ADDCALL a str" << i0_str[0] << " "<< i0_str[1] << "\\n";
//std::cerr << "ADDCALL a data" << i0_data << "\\n"; //std::cerr << "ADDCALL a data" << i0_data << "\\n";
...@@ -498,31 +500,34 @@ class GpuDimShuffle(Op): ...@@ -498,31 +500,34 @@ class GpuDimShuffle(Op):
#alloc an output #alloc an output
print >> sio, """ print >> sio, """
if (cnda_%(res)s) if (cnda_%(res)s && (cnda_%(res)s->nd == %(nd_out)s))
{ {
//TODO: re-use previously-allocated stuff //re-use previously-allocated cnda
Py_DECREF(cnda_%(res)s);
cnda_%(res)s = NULL;
} }
if (NULL == cnda_%(res)s) { else
{
if (cnda_%(res)s)
{
Py_DECREF(cnda_%(res)s);
cnda_%(res)s = NULL;
}
cnda_%(res)s = (CudaNdarray*) CudaNdarray_new_null(); cnda_%(res)s = (CudaNdarray*) CudaNdarray_new_null();
if (NULL == cnda_%(res)s) if (NULL == cnda_%(res)s)
{ {
PyErr_SetString(PyExc_MemoryError, "Failed to allocate result"); PyErr_SetString(PyExc_MemoryError, "Failed to allocate result");
%(fail)s; %(fail)s;
} }
if (CudaNdarray_set_nd(cnda_%(res)s, %(nd_out)s))
{
// err message set
Py_DECREF(cnda_%(res)s);
cnda_%(res)s = NULL;
%(fail)s;
}
} }
""" %locals() """ %locals()
#get the copy / view of the input depending on whether we're doing things inplace or not.
print >> sio, """ print >> sio, """
if (CudaNdarray_set_nd(cnda_%(res)s, %(nd_out)s))
{
// err message set
Py_DECREF(cnda_%(res)s);
cnda_%(res)s = NULL;
%(fail)s;
}
if (CudaNdarray_set_device_data(cnda_%(res)s, CudaNdarray_DEV_DATA(cnda_%(input)s), cnda_%(input)s)) if (CudaNdarray_set_device_data(cnda_%(res)s, CudaNdarray_DEV_DATA(cnda_%(input)s), cnda_%(input)s))
{ {
// err message set // err message set
......
...@@ -24,10 +24,10 @@ def run_nnet(use_gpu): ...@@ -24,10 +24,10 @@ def run_nnet(use_gpu):
v = tcn.shared_constructor(numpy.zeros((n_hid, n_out)), 'c') v = tcn.shared_constructor(numpy.zeros((n_hid, n_out)), 'c')
c = tcn.shared_constructor(numpy.zeros(n_out), 'c') c = tcn.shared_constructor(numpy.zeros(n_out), 'c')
else: else:
w = shared(0.01*(numpy.random.rand(n_in,n_hid)-0.5), 'w') w = shared(numpy.asarray(0.01*(numpy.random.rand(n_in,n_hid)-0.5), dtype='float32'), 'w')
b = shared(numpy.zeros(n_hid), 'b') b = shared(numpy.asarray(numpy.zeros(n_hid), dtype='float32'), 'b')
v = shared(numpy.zeros((n_hid, n_out)), 'c') v = shared(numpy.asarray(numpy.zeros((n_hid, n_out)), dtype='float32'), 'c')
c = shared(numpy.zeros(n_out), 'c') c = shared(numpy.asarray(numpy.zeros(n_out), dtype='float32'), 'c')
x = tensor.fmatrix('x') x = tensor.fmatrix('x')
y = tensor.fmatrix('y') y = tensor.fmatrix('y')
...@@ -54,10 +54,14 @@ def run_nnet(use_gpu): ...@@ -54,10 +54,14 @@ def run_nnet(use_gpu):
lr = numpy.asarray(0.01, dtype='float32') lr = numpy.asarray(0.01, dtype='float32')
for i in xrange(100): for i in xrange(100):
train(xval, yval, lr) rval = train(xval, yval, lr)
mode.print_summary() mode.print_summary()
return rval
def test_nnet_cpu(): def test_nnet_cpu_gpu():
run_nnet(False) numpy.random.seed(23456)
def test_nnet_gpu(): rval_cpu = run_nnet(False)
run_nnet(True) numpy.random.seed(23456)
rval_gpu = run_nnet(True)
assert numpy.allclose(rval_cpu, rval_gpu,rtol=1e-4,atol=1e-6)
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论