提交 c3f14dd4 authored 作者: abergeron's avatar abergeron

Merge pull request #2812 from nouiz/mixed

Mixed: more error detection, clean up, optimization
......@@ -421,7 +421,7 @@ def pfunc(params, outputs=None, mode=None, updates=None, givens=None,
if profile is None:
profile = config.profile
# profile -> True or False
if profile == True:
if profile is True:
profile = ProfileStats(message=name)
# profile -> object
if type(profile) == str:
......
......@@ -635,7 +635,8 @@ class ModuleCache(object):
self.stats[0] += 1
return self.module_from_name[name]
def refresh(self, age_thresh_use=None, delete_if_problem=False, cleanup=True):
def refresh(self, age_thresh_use=None, delete_if_problem=False,
cleanup=True):
"""Update cache data by walking the cache directory structure.
Load key.pkl files that have not been loaded yet.
......
......@@ -203,7 +203,8 @@ def version():
if version.v is None:
f = theano.function([], DnnVersion()(),
theano.Mode(optimizer=None))
theano.Mode(optimizer=None),
profile=False)
version.v = f()
return version.v
version.v = None
......
......@@ -247,7 +247,7 @@ class GpuCrossentropySoftmax1HotWithBiasDx(GpuOp):
def c_code_cache_version(self):
# return ()
return (7,)
return (8,)
def c_code(self, node, nodename, inp, out, sub):
dnll, sm, y_idx = inp
......@@ -288,6 +288,13 @@ class GpuCrossentropySoftmax1HotWithBiasDx(GpuOp):
"dnll.shape[0] != y_idx.shape[0]");
%(fail)s;
}
if (CudaNdarray_HOST_DIMS(%(sm)s)[0] !=
CudaNdarray_HOST_DIMS(%(y_idx)s)[0])
{
PyErr_SetString(PyExc_ValueError,
"sm.shape[0] != y_idx.shape[0]");
%(fail)s;
}
if ((NULL == %(dx)s)
|| (CudaNdarray_HOST_DIMS(%(dx)s)[0] !=
CudaNdarray_HOST_DIMS(%(sm)s)[0])
......
......@@ -20,6 +20,7 @@ from theano.gof.opt import LocalMetaOptimizer
from theano.sandbox.cuda.basic_ops import (
gpu_eye, gpu_contiguous,
gpu_from_host, host_from_gpu, GpuFromHost, HostFromGpu,
GpuContiguous,
GpuElemwise, GpuDimShuffle, GpuReshape, GpuCAReduce, GpuFlatten,
GpuSubtensor, GpuAdvancedSubtensor1,
GpuAdvancedIncSubtensor1, GpuAdvancedIncSubtensor1_dev20,
......@@ -1997,6 +1998,19 @@ def local_gpualloc_memset_0(node):
return [new_out]
@register_opt()
@local_optimizer([GpuContiguous])
def local_gpu_contiguous_gpu_contiguous(node):
"""
gpu_contiguous(gpu_contiguous(x)) -> gpu_contiguous(x)
"""
if isinstance(node.op, GpuContiguous):
inp = node.inputs[0]
if inp.owner and isinstance(inp.owner.op, GpuContiguous):
return [inp]
@register_opt()
@local_optimizer([gpu_from_host, tensor.Eye])
def local_gpu_eye(node):
......
......@@ -79,6 +79,18 @@ def test_local_remove_all_assert():
assert len(a_op) == 1
def test_local_gpu_contiguous_gpu_contiguous():
a = tensor.fmatrix()
o1 = basic_ops.gpu_contiguous(a)
o2 = basic_ops.gpu_contiguous(o1)
f1 = theano.function([a], o1, mode=mode_with_gpu)
f2 = theano.function([a], o2, mode=mode_with_gpu)
assert 1 == len([node for node in f1.maker.fgraph.toposort()
if isinstance(node.op, basic_ops.GpuContiguous)])
assert 1 == len([node for node in f2.maker.fgraph.toposort()
if isinstance(node.op, basic_ops.GpuContiguous)])
def test_int_pow():
a = CudaNdarrayType([False])()
......
......@@ -295,7 +295,7 @@ class GpuCrossentropySoftmax1HotWithBiasDx(Op):
def c_code_cache_version(self):
# return ()
return (7,)
return (8,)
def c_headers(self):
return ['cuda.h', '<gpuarray/extension.h>', '<numpy_compat.h>']
......@@ -351,6 +351,13 @@ class GpuCrossentropySoftmax1HotWithBiasDx(Op):
"dnll.shape[0] != y_idx.shape[0]");
%(fail)s;
}
if (PyGpuArray_DIMS(%(sm)s)[0] !=
PyGpuArray_DIMS(%(y_idx)s)[0])
{
PyErr_SetString(PyExc_ValueError,
"sm.shape[0] != y_idx.shape[0]");
%(fail)s;
}
if ((NULL == %(dx)s)
|| (PyGpuArray_DIMS(%(dx)s)[0] !=
PyGpuArray_DIMS(%(sm)s)[0])
......
......@@ -20,7 +20,7 @@ from theano.tensor.nnet.conv import ConvOp
from .type import GpuArrayType, GpuArrayConstant
from .basic_ops import (host_from_gpu, gpu_from_host,
HostFromGpu, GpuFromHost,
GpuSplit,
GpuSplit, GpuContiguous,
gpu_alloc, GpuAlloc, GpuReshape,
GpuEye, gpu_join, GpuJoin)
from .blas import gpu_dot22, GpuGemv, GpuGemm, GpuGer
......@@ -205,6 +205,19 @@ def local_gpualloc_memset_0(node):
return [new_out]
@register_opt()
@local_optimizer([GpuContiguous])
def local_gpu_contiguous_gpu_contiguous(node):
"""
gpu_contiguous(gpu_contiguous(x)) -> gpu_contiguous(x)
"""
if isinstance(node.op, GpuContiguous):
inp = node.inputs[0]
if inp.owner and isinstance(inp.owner.op, GpuContiguous):
return [inp]
@register_opt('fast_compile')
@op_lifter([tensor.Reshape])
def local_gpureshape(node):
......
......@@ -7,6 +7,7 @@ from theano.tests.unittest_tools import SkipTest
from theano.tensor.tests import test_basic
import theano.sandbox.gpuarray
from .. import basic_ops
from ..type import GpuArrayType, gpuarray_shared_constructor
from ..basic_ops import (GpuAlloc, GpuReshape, gpu_alloc,
gpu_from_host, host_from_gpu)
......@@ -48,6 +49,18 @@ def test_local_remove_all_assert():
assert len(a_op) == 1
def test_local_gpu_contiguous_gpu_contiguous():
a = tensor.fmatrix()
o1 = basic_ops.gpu_contiguous(a)
o2 = basic_ops.gpu_contiguous(o1)
f1 = theano.function([a], o1, mode=mode_with_gpu)
f2 = theano.function([a], o2, mode=mode_with_gpu)
assert 1 == len([node for node in f1.maker.fgraph.toposort()
if isinstance(node.op, basic_ops.GpuContiguous)])
assert 1 == len([node for node in f2.maker.fgraph.toposort()
if isinstance(node.op, basic_ops.GpuContiguous)])
def test_flatten():
m = theano.tensor.fmatrix()
f = theano.function([m], m.flatten(), mode=mode_with_gpu)
......
......@@ -177,7 +177,8 @@ def default_blas_ldflags():
# Now test it!
x = theano.tensor.fmatrix()
try:
theano.function([x], theano.tensor.blas._dot22(x,x))
theano.function([x], theano.tensor.blas._dot22(x,x),
profile=False)
except Exception as e:
print e
yield ""
......
......@@ -1108,7 +1108,7 @@ class CrossentropySoftmax1HotWithBiasDx(gof.Op):
return [g_dy, g_sm, g_y_idx]
def c_code_cache_version(self):
return (4,)
return (5,)
def c_code(self, node, name, inp, out, sub):
dnll, sm, y_idx = inp
......@@ -1167,6 +1167,13 @@ class CrossentropySoftmax1HotWithBiasDx(gof.Op):
(long int)PyArray_DIMS(%(y_idx)s)[0]);
%(fail)s;
}
if (PyArray_DIMS(%(sm)s)[0] !=
PyArray_DIMS(%(y_idx)s)[0])
{
PyErr_SetString(PyExc_ValueError,
"sm.shape[0] != y_idx.shape[0]");
%(fail)s;
}
if ((NULL == %(dx)s)
|| (PyArray_DIMS(%(dx)s)[0] != PyArray_DIMS(%(sm)s)[0])
|| (PyArray_DIMS(%(dx)s)[1] != PyArray_DIMS(%(sm)s)[1]))
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论