提交 c3f14dd4 authored 作者: abergeron's avatar abergeron

Merge pull request #2812 from nouiz/mixed

Mixed: more error detection, clean up, optimization
...@@ -421,7 +421,7 @@ def pfunc(params, outputs=None, mode=None, updates=None, givens=None, ...@@ -421,7 +421,7 @@ def pfunc(params, outputs=None, mode=None, updates=None, givens=None,
if profile is None: if profile is None:
profile = config.profile profile = config.profile
# profile -> True or False # profile -> True or False
if profile == True: if profile is True:
profile = ProfileStats(message=name) profile = ProfileStats(message=name)
# profile -> object # profile -> object
if type(profile) == str: if type(profile) == str:
......
...@@ -635,7 +635,8 @@ class ModuleCache(object): ...@@ -635,7 +635,8 @@ class ModuleCache(object):
self.stats[0] += 1 self.stats[0] += 1
return self.module_from_name[name] return self.module_from_name[name]
def refresh(self, age_thresh_use=None, delete_if_problem=False, cleanup=True): def refresh(self, age_thresh_use=None, delete_if_problem=False,
cleanup=True):
"""Update cache data by walking the cache directory structure. """Update cache data by walking the cache directory structure.
Load key.pkl files that have not been loaded yet. Load key.pkl files that have not been loaded yet.
......
...@@ -203,7 +203,8 @@ def version(): ...@@ -203,7 +203,8 @@ def version():
if version.v is None: if version.v is None:
f = theano.function([], DnnVersion()(), f = theano.function([], DnnVersion()(),
theano.Mode(optimizer=None)) theano.Mode(optimizer=None),
profile=False)
version.v = f() version.v = f()
return version.v return version.v
version.v = None version.v = None
......
...@@ -247,7 +247,7 @@ class GpuCrossentropySoftmax1HotWithBiasDx(GpuOp): ...@@ -247,7 +247,7 @@ class GpuCrossentropySoftmax1HotWithBiasDx(GpuOp):
def c_code_cache_version(self): def c_code_cache_version(self):
# return () # return ()
return (7,) return (8,)
def c_code(self, node, nodename, inp, out, sub): def c_code(self, node, nodename, inp, out, sub):
dnll, sm, y_idx = inp dnll, sm, y_idx = inp
...@@ -288,6 +288,13 @@ class GpuCrossentropySoftmax1HotWithBiasDx(GpuOp): ...@@ -288,6 +288,13 @@ class GpuCrossentropySoftmax1HotWithBiasDx(GpuOp):
"dnll.shape[0] != y_idx.shape[0]"); "dnll.shape[0] != y_idx.shape[0]");
%(fail)s; %(fail)s;
} }
if (CudaNdarray_HOST_DIMS(%(sm)s)[0] !=
CudaNdarray_HOST_DIMS(%(y_idx)s)[0])
{
PyErr_SetString(PyExc_ValueError,
"sm.shape[0] != y_idx.shape[0]");
%(fail)s;
}
if ((NULL == %(dx)s) if ((NULL == %(dx)s)
|| (CudaNdarray_HOST_DIMS(%(dx)s)[0] != || (CudaNdarray_HOST_DIMS(%(dx)s)[0] !=
CudaNdarray_HOST_DIMS(%(sm)s)[0]) CudaNdarray_HOST_DIMS(%(sm)s)[0])
......
...@@ -20,6 +20,7 @@ from theano.gof.opt import LocalMetaOptimizer ...@@ -20,6 +20,7 @@ from theano.gof.opt import LocalMetaOptimizer
from theano.sandbox.cuda.basic_ops import ( from theano.sandbox.cuda.basic_ops import (
gpu_eye, gpu_contiguous, gpu_eye, gpu_contiguous,
gpu_from_host, host_from_gpu, GpuFromHost, HostFromGpu, gpu_from_host, host_from_gpu, GpuFromHost, HostFromGpu,
GpuContiguous,
GpuElemwise, GpuDimShuffle, GpuReshape, GpuCAReduce, GpuFlatten, GpuElemwise, GpuDimShuffle, GpuReshape, GpuCAReduce, GpuFlatten,
GpuSubtensor, GpuAdvancedSubtensor1, GpuSubtensor, GpuAdvancedSubtensor1,
GpuAdvancedIncSubtensor1, GpuAdvancedIncSubtensor1_dev20, GpuAdvancedIncSubtensor1, GpuAdvancedIncSubtensor1_dev20,
...@@ -1997,6 +1998,19 @@ def local_gpualloc_memset_0(node): ...@@ -1997,6 +1998,19 @@ def local_gpualloc_memset_0(node):
return [new_out] return [new_out]
@register_opt()
@local_optimizer([GpuContiguous])
def local_gpu_contiguous_gpu_contiguous(node):
"""
gpu_contiguous(gpu_contiguous(x)) -> gpu_contiguous(x)
"""
if isinstance(node.op, GpuContiguous):
inp = node.inputs[0]
if inp.owner and isinstance(inp.owner.op, GpuContiguous):
return [inp]
@register_opt() @register_opt()
@local_optimizer([gpu_from_host, tensor.Eye]) @local_optimizer([gpu_from_host, tensor.Eye])
def local_gpu_eye(node): def local_gpu_eye(node):
......
...@@ -79,6 +79,18 @@ def test_local_remove_all_assert(): ...@@ -79,6 +79,18 @@ def test_local_remove_all_assert():
assert len(a_op) == 1 assert len(a_op) == 1
def test_local_gpu_contiguous_gpu_contiguous():
a = tensor.fmatrix()
o1 = basic_ops.gpu_contiguous(a)
o2 = basic_ops.gpu_contiguous(o1)
f1 = theano.function([a], o1, mode=mode_with_gpu)
f2 = theano.function([a], o2, mode=mode_with_gpu)
assert 1 == len([node for node in f1.maker.fgraph.toposort()
if isinstance(node.op, basic_ops.GpuContiguous)])
assert 1 == len([node for node in f2.maker.fgraph.toposort()
if isinstance(node.op, basic_ops.GpuContiguous)])
def test_int_pow(): def test_int_pow():
a = CudaNdarrayType([False])() a = CudaNdarrayType([False])()
......
...@@ -295,7 +295,7 @@ class GpuCrossentropySoftmax1HotWithBiasDx(Op): ...@@ -295,7 +295,7 @@ class GpuCrossentropySoftmax1HotWithBiasDx(Op):
def c_code_cache_version(self): def c_code_cache_version(self):
# return () # return ()
return (7,) return (8,)
def c_headers(self): def c_headers(self):
return ['cuda.h', '<gpuarray/extension.h>', '<numpy_compat.h>'] return ['cuda.h', '<gpuarray/extension.h>', '<numpy_compat.h>']
...@@ -351,6 +351,13 @@ class GpuCrossentropySoftmax1HotWithBiasDx(Op): ...@@ -351,6 +351,13 @@ class GpuCrossentropySoftmax1HotWithBiasDx(Op):
"dnll.shape[0] != y_idx.shape[0]"); "dnll.shape[0] != y_idx.shape[0]");
%(fail)s; %(fail)s;
} }
if (PyGpuArray_DIMS(%(sm)s)[0] !=
PyGpuArray_DIMS(%(y_idx)s)[0])
{
PyErr_SetString(PyExc_ValueError,
"sm.shape[0] != y_idx.shape[0]");
%(fail)s;
}
if ((NULL == %(dx)s) if ((NULL == %(dx)s)
|| (PyGpuArray_DIMS(%(dx)s)[0] != || (PyGpuArray_DIMS(%(dx)s)[0] !=
PyGpuArray_DIMS(%(sm)s)[0]) PyGpuArray_DIMS(%(sm)s)[0])
......
...@@ -20,7 +20,7 @@ from theano.tensor.nnet.conv import ConvOp ...@@ -20,7 +20,7 @@ from theano.tensor.nnet.conv import ConvOp
from .type import GpuArrayType, GpuArrayConstant from .type import GpuArrayType, GpuArrayConstant
from .basic_ops import (host_from_gpu, gpu_from_host, from .basic_ops import (host_from_gpu, gpu_from_host,
HostFromGpu, GpuFromHost, HostFromGpu, GpuFromHost,
GpuSplit, GpuSplit, GpuContiguous,
gpu_alloc, GpuAlloc, GpuReshape, gpu_alloc, GpuAlloc, GpuReshape,
GpuEye, gpu_join, GpuJoin) GpuEye, gpu_join, GpuJoin)
from .blas import gpu_dot22, GpuGemv, GpuGemm, GpuGer from .blas import gpu_dot22, GpuGemv, GpuGemm, GpuGer
...@@ -205,6 +205,19 @@ def local_gpualloc_memset_0(node): ...@@ -205,6 +205,19 @@ def local_gpualloc_memset_0(node):
return [new_out] return [new_out]
@register_opt()
@local_optimizer([GpuContiguous])
def local_gpu_contiguous_gpu_contiguous(node):
"""
gpu_contiguous(gpu_contiguous(x)) -> gpu_contiguous(x)
"""
if isinstance(node.op, GpuContiguous):
inp = node.inputs[0]
if inp.owner and isinstance(inp.owner.op, GpuContiguous):
return [inp]
@register_opt('fast_compile') @register_opt('fast_compile')
@op_lifter([tensor.Reshape]) @op_lifter([tensor.Reshape])
def local_gpureshape(node): def local_gpureshape(node):
......
...@@ -7,6 +7,7 @@ from theano.tests.unittest_tools import SkipTest ...@@ -7,6 +7,7 @@ from theano.tests.unittest_tools import SkipTest
from theano.tensor.tests import test_basic from theano.tensor.tests import test_basic
import theano.sandbox.gpuarray import theano.sandbox.gpuarray
from .. import basic_ops
from ..type import GpuArrayType, gpuarray_shared_constructor from ..type import GpuArrayType, gpuarray_shared_constructor
from ..basic_ops import (GpuAlloc, GpuReshape, gpu_alloc, from ..basic_ops import (GpuAlloc, GpuReshape, gpu_alloc,
gpu_from_host, host_from_gpu) gpu_from_host, host_from_gpu)
...@@ -48,6 +49,18 @@ def test_local_remove_all_assert(): ...@@ -48,6 +49,18 @@ def test_local_remove_all_assert():
assert len(a_op) == 1 assert len(a_op) == 1
def test_local_gpu_contiguous_gpu_contiguous():
a = tensor.fmatrix()
o1 = basic_ops.gpu_contiguous(a)
o2 = basic_ops.gpu_contiguous(o1)
f1 = theano.function([a], o1, mode=mode_with_gpu)
f2 = theano.function([a], o2, mode=mode_with_gpu)
assert 1 == len([node for node in f1.maker.fgraph.toposort()
if isinstance(node.op, basic_ops.GpuContiguous)])
assert 1 == len([node for node in f2.maker.fgraph.toposort()
if isinstance(node.op, basic_ops.GpuContiguous)])
def test_flatten(): def test_flatten():
m = theano.tensor.fmatrix() m = theano.tensor.fmatrix()
f = theano.function([m], m.flatten(), mode=mode_with_gpu) f = theano.function([m], m.flatten(), mode=mode_with_gpu)
......
...@@ -177,7 +177,8 @@ def default_blas_ldflags(): ...@@ -177,7 +177,8 @@ def default_blas_ldflags():
# Now test it! # Now test it!
x = theano.tensor.fmatrix() x = theano.tensor.fmatrix()
try: try:
theano.function([x], theano.tensor.blas._dot22(x,x)) theano.function([x], theano.tensor.blas._dot22(x,x),
profile=False)
except Exception as e: except Exception as e:
print e print e
yield "" yield ""
......
...@@ -1108,7 +1108,7 @@ class CrossentropySoftmax1HotWithBiasDx(gof.Op): ...@@ -1108,7 +1108,7 @@ class CrossentropySoftmax1HotWithBiasDx(gof.Op):
return [g_dy, g_sm, g_y_idx] return [g_dy, g_sm, g_y_idx]
def c_code_cache_version(self): def c_code_cache_version(self):
return (4,) return (5,)
def c_code(self, node, name, inp, out, sub): def c_code(self, node, name, inp, out, sub):
dnll, sm, y_idx = inp dnll, sm, y_idx = inp
...@@ -1167,6 +1167,13 @@ class CrossentropySoftmax1HotWithBiasDx(gof.Op): ...@@ -1167,6 +1167,13 @@ class CrossentropySoftmax1HotWithBiasDx(gof.Op):
(long int)PyArray_DIMS(%(y_idx)s)[0]); (long int)PyArray_DIMS(%(y_idx)s)[0]);
%(fail)s; %(fail)s;
} }
if (PyArray_DIMS(%(sm)s)[0] !=
PyArray_DIMS(%(y_idx)s)[0])
{
PyErr_SetString(PyExc_ValueError,
"sm.shape[0] != y_idx.shape[0]");
%(fail)s;
}
if ((NULL == %(dx)s) if ((NULL == %(dx)s)
|| (PyArray_DIMS(%(dx)s)[0] != PyArray_DIMS(%(sm)s)[0]) || (PyArray_DIMS(%(dx)s)[0] != PyArray_DIMS(%(sm)s)[0])
|| (PyArray_DIMS(%(dx)s)[1] != PyArray_DIMS(%(sm)s)[1])) || (PyArray_DIMS(%(dx)s)[1] != PyArray_DIMS(%(sm)s)[1]))
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论