提交 6253b797 authored 作者: abergeron's avatar abergeron

Merge pull request #1717 from nouiz/faster_opt

Faster opt
......@@ -166,9 +166,10 @@ yourself. Here is some code that will help you.
cd OpenBLAS
make FC=gfortran
sudo make PREFIX=/usr/local/ install
cd /usr/local/lib
ln -s libopenblas.so /usr/lib/libblas.so
ln -s libopenblas.so.0 /usr/lib/libblas.so.3gf
# Tell Theano to use OpenBLAS.
# This work only for the current user.
# Each Theano user on that computer should run that line.
echo -e "\n[blas]\nldflags = -lopenblas\n" >> ~/.theanorc
Contributed GPU instruction
......
......@@ -787,8 +787,8 @@ class ProfileStats(object):
if self.variable_shape or self.variable_strides:
self.summary_memory(file, n_apply_to_print)
if self.optimizer_profile:
print "Optimizer Profile"
print "-----------------"
print >> file, "Optimizer Profile"
print >> file, "-----------------"
self.optimizer_profile[0].print_profile(file,
self.optimizer_profile[1])
......
......@@ -1252,7 +1252,7 @@ class NavigatorOptimizer(Optimizer):
pruner(node)
if chin is not None:
def on_change_input(self, fgraph, node, i, r, new_r, reason):
chin(node, i, r, new_r)
chin(node, i, r, new_r, reason)
u = Updater()
fgraph.attach_feature(u)
......@@ -1701,7 +1701,7 @@ class EquilibriumOptimizer(NavigatorOptimizer):
lopt))
count_opt = []
not_used = 0
not_used = []
not_used_time = 0
process_count = {}
for o in opt.global_optimizers + list(opt.get_local_optimizers()):
......@@ -1713,7 +1713,7 @@ class EquilibriumOptimizer(NavigatorOptimizer):
if count > 0:
count_opt.append((time_opts[opt], count, opt))
else:
not_used += 1
not_used.append((time_opts[opt], opt))
not_used_time += time_opts[opt]
if count_opt:
......@@ -1724,7 +1724,10 @@ class EquilibriumOptimizer(NavigatorOptimizer):
print >> stream, blanc, ' %.3fs - %d - %s' % (
t, count, opt)
print >> stream, blanc, ' %.3fs - in %d optimization that where not used' % (
not_used_time, not_used)
not_used_time, len(not_used))
not_used.sort()
for (t, opt) in not_used[::-1]:
print >> stream, blanc + " ", ' %.3fs - %s' % (t, opt)
print >> stream
@staticmethod
......
......@@ -76,11 +76,11 @@ class GpuElemwise(HideC, Elemwise):
try:
inps = [make_argument(i, 'i%d' % (n,)) for n, i in
enumerate(node.inputs)]
scal_ins = [scalar.Scalar(i.dtype) for i in node.inputs]
scal_ins = [scalar.get_scalar_type(i.dtype) for i in node.inputs]
outs = [make_argument(o, 'o%d' % (n,)) for n, o in
enumerate(node.outputs) if not n in self.inplace_pattern]
scal_out = [scalar.Scalar(o.dtype) for o in node.outputs]
scal_out = [scalar.get_scalar_type(o.dtype) for o in node.outputs]
fake_node = Apply(self.scalar_op, [i() for i in scal_ins],
[o() for o in scal_out])
......@@ -103,11 +103,11 @@ class GpuElemwise(HideC, Elemwise):
def generate_kernel(self, node, nodename):
inps = [make_argument(i, 'i%d' % (n,)) for n, i in
enumerate(node.inputs)]
scal_ins = [scalar.Scalar(i.dtype) for i in node.inputs]
scal_ins = [scalar.get_scalar_type(i.dtype) for i in node.inputs]
outs = [make_argument(o, 'o%d' % (n,)) for n, o in
enumerate(node.outputs) if not n in self.inplace_pattern]
scal_out = [scalar.Scalar(o.dtype) for o in node.outputs]
scal_out = [scalar.get_scalar_type(o.dtype) for o in node.outputs]
fake_node = Apply(self.scalar_op, [i() for i in scal_ins],
[o() for o in scal_out])
......
......@@ -69,6 +69,18 @@ def upcast(dtype, *dtypes):
return rval
def get_scalar_type(dtype):
"""
Return an Scalar(dtype) object.
This cache objects to save allocation and run time.
"""
if dtype not in get_scalar_type.cache:
get_scalar_type.cache[dtype] = Scalar(dtype=dtype)
return get_scalar_type.cache[dtype]
get_scalar_type.cache = {}
def as_scalar(x, name=None):
if isinstance(x, gof.Apply):
if len(x.outputs) != 1:
......@@ -91,7 +103,7 @@ def constant(x):
# purpose typically.
if hasattr(x, 'dtype'):
assert x.ndim == 0
return ScalarConstant(Scalar(str(x.dtype)), x)
return ScalarConstant(get_scalar_type(str(x.dtype)), x)
if isinstance(x, builtin_float):
for dtype in ['float32', 'float64']:
x_ = theano._asarray(x, dtype=dtype)
......@@ -99,7 +111,7 @@ def constant(x):
break
x_ = None
assert x_ is not None
return ScalarConstant(Scalar(str(x_.dtype)), x)
return ScalarConstant(get_scalar_type(str(x_.dtype)), x)
if isinstance(x, builtin_int):
for dtype in ['int8', 'int16', 'int32', 'int64']:
x_ = theano._asarray(x, dtype=dtype)
......@@ -107,7 +119,7 @@ def constant(x):
break
x_ = None
assert x_ is not None
return ScalarConstant(Scalar(str(x_.dtype)), x)
return ScalarConstant(get_scalar_type(str(x_.dtype)), x)
if isinstance(x, builtin_complex):
#TODO: We have added the complex type, so this should be tested
raise NotImplementedError()
......@@ -457,18 +469,18 @@ theano.compile.register_view_op_c_code(
1)
int8 = Scalar('int8')
int16 = Scalar('int16')
int32 = Scalar('int32')
int64 = Scalar('int64')
uint8 = Scalar('uint8')
uint16 = Scalar('uint16')
uint32 = Scalar('uint32')
uint64 = Scalar('uint64')
float32 = Scalar('float32')
float64 = Scalar('float64')
complex64 = Scalar('complex64')
complex128 = Scalar('complex128')
int8 = get_scalar_type('int8')
int16 = get_scalar_type('int16')
int32 = get_scalar_type('int32')
int64 = get_scalar_type('int64')
uint8 = get_scalar_type('uint8')
uint16 = get_scalar_type('uint16')
uint32 = get_scalar_type('uint32')
uint64 = get_scalar_type('uint64')
float32 = get_scalar_type('float32')
float64 = get_scalar_type('float64')
complex64 = get_scalar_type('complex64')
complex128 = get_scalar_type('complex128')
int_types = int8, int16, int32, int64
uint_types = uint8, uint16, uint32, uint64
......@@ -584,7 +596,7 @@ class _scalar_py_operators:
# The second is needed for Elemwise ops to work right
if dtype is None:
dtype = str(self.type.dtype)
return second(self, ScalarConstant(Scalar(dtype), 0))
return second(self, ScalarConstant(get_scalar_type(dtype), 0))
def astype(self, dtype):
return cast(self, dtype)
......@@ -628,7 +640,8 @@ complexs128 = _multi(complex128)
# necessary to use this same mechanism in other places as well in the future.
class upcast_out(object):
def __new__(self, *types):
return Scalar(dtype=Scalar.upcast(*types)),
dtype = Scalar.upcast(*types)
return get_scalar_type(dtype),
class upgrade_to_float(object):
......@@ -644,7 +657,7 @@ class upgrade_to_float(object):
uint16: float32,
uint32: float64,
uint64: float64}
return Scalar(Scalar.upcast(*[conv.get(type, type)
return get_scalar_type(Scalar.upcast(*[conv.get(type, type)
for type in types])),
......@@ -656,7 +669,7 @@ class same_out(object):
def upcast_out_no_complex(*types):
if any([type in complex_types for type in types]):
raise TypeError('complex type are not supported')
return Scalar(dtype=Scalar.upcast(*types)),
return get_scalar_type(dtype=Scalar.upcast(*types)),
def same_out_float_only(type):
......@@ -1455,7 +1468,7 @@ def div_proxy(x, y):
class TrueDiv(BinaryScalarOp):
def output_types(self, types):
if all(t in discrete_types for t in types):
return [Scalar(config.floatX)]
return [get_scalar_type(config.floatX)]
else:
return super(TrueDiv, self).output_types(types)
......
......@@ -59,7 +59,7 @@ def safe_new(x, tag='', dtype=None):
# making the pushout optimization fail
elif isinstance(x, scalar.ScalarVariable):
if dtype:
nw_x = scalar.Scalar(dtype=dtype)()
nw_x = scalar.get_scalar_type(dtype=dtype)()
else:
nw_x = x.type()
nw_x.name = nw_name
......
......@@ -1113,8 +1113,11 @@ class test_structureddot(unittest.TestCase):
utt.assert_allclose(scipy_result, theano_result)
if (not theano.config.mode in ["DebugMode", "DEBUG_MODE"] and
theano.config.cxx):
self.assertFalse(theano_time > overhead_rtol * scipy_time +
overhead_tol)
self.assertFalse(
theano_time > overhead_rtol * scipy_time + overhead_tol,
(theano_time,
overhead_rtol * scipy_time + overhead_tol,
scipy_time, overhead_rtol, overhead_tol))
class DotTests(utt.InferShapeTester):
......
......@@ -993,7 +993,7 @@ class ScalarFromTensor(Op):
assert t.type.broadcastable == ()
return Apply(self,
[t],
[scal.Scalar(dtype=t.type.dtype).make_variable()])
[scal.get_scalar_type(dtype=t.type.dtype).make_variable()])
def perform(self, node, inp, out_):
s, = inp
......
......@@ -8,7 +8,7 @@ import theano
from theano import gof
from theano.gof import Apply, Op
from theano import scalar
from theano.scalar import Scalar
from theano.scalar import Scalar, get_scalar_type
from theano.printing import pprint
from theano.gof.python25 import all, any
from theano.tensor.utils import hash_from_dict
......@@ -515,7 +515,7 @@ class Elemwise(Op):
"""
inputs = map(as_tensor_variable, inputs)
shadow = self.scalar_op.make_node(
*[Scalar(dtype=i.type.dtype)() for i in inputs])
*[get_scalar_type(dtype=i.type.dtype)() for i in inputs])
target_length = max([input.type.ndim for input in inputs])
......@@ -718,7 +718,7 @@ class Elemwise(Op):
def as_scalar(t):
if isinstance(t.type, (NullType, DisconnectedType)):
return t
return Scalar(t.type.dtype)()
return get_scalar_type(t.type.dtype)()
scalar_inputs = map(as_scalar, inputs)
scalar_ograds = map(as_scalar, ograds)
......@@ -1039,9 +1039,9 @@ class Elemwise(Op):
# We generate the C code of the inner loop using the scalar op
task_code = self.scalar_op.c_code(
Apply(self.scalar_op,
[Scalar(dtype=input.type.dtype)()
[get_scalar_type(dtype=input.type.dtype)()
for input in node.inputs],
[Scalar(dtype=output.type.dtype)()
[get_scalar_type(dtype=output.type.dtype)()
for output in node.outputs]),
nodename + '_scalar_',
["%s_i" % s for s in _inames],
......@@ -1161,11 +1161,11 @@ class Elemwise(Op):
# now we insert versions for the ops on which we depend...
scalar_node = Apply(self.scalar_op,
[Scalar(dtype=input.type.dtype)() for input in node.inputs],
[Scalar(dtype=output.type.dtype)() for output in node.outputs])
[get_scalar_type(dtype=input.type.dtype)() for input in node.inputs],
[get_scalar_type(dtype=output.type.dtype)() for output in node.outputs])
version.append(self.scalar_op.c_code_cache_version_apply(scalar_node))
for i in node.inputs + node.outputs:
version.append(Scalar(dtype=i.type.dtype).c_code_cache_version())
version.append(get_scalar_type(dtype=i.type.dtype).c_code_cache_version())
if all(version):
return tuple(version)
else:
......@@ -1531,9 +1531,9 @@ for(int i=0;i<PyArray_NDIM(%(iname)s);i++){
task1_code = self.scalar_op.c_code(
Apply(
self.scalar_op,
[Scalar(dtype=input.type.dtype)()
[get_scalar_type(dtype=input.type.dtype)()
for input in (node.inputs * 2)],
[Scalar(dtype=output.type.dtype)()
[get_scalar_type(dtype=output.type.dtype)()
for input in node.outputs]),
None,
["%s_i" % aname, "%s_i" % inames[0]],
......@@ -1583,11 +1583,11 @@ for(int i=0;i<PyArray_NDIM(%(iname)s);i++){
# now we insert versions for the ops on which we depend...
scalar_node = Apply(self.scalar_op,
[Scalar(dtype=input.type.dtype)() for input in node.inputs],
[Scalar(dtype=output.type.dtype)() for output in node.outputs])
[get_scalar_type(dtype=input.type.dtype)() for input in node.inputs],
[get_scalar_type(dtype=output.type.dtype)() for output in node.outputs])
version.append(self.scalar_op.c_code_cache_version_apply(scalar_node))
for i in node.inputs + node.outputs:
version.append(Scalar(dtype=i.type.dtype).c_code_cache_version())
version.append(get_scalar_type(dtype=i.type.dtype).c_code_cache_version())
if all(version):
return tuple(version)
else:
......@@ -1665,7 +1665,7 @@ class CAReduceDtype(CAReduce):
def __init__(self, scalar_op, axis=None, dtype=None, acc_dtype=None):
"""
Usage: CAReduceDtype(scalar_op, axis=None, dtype=None)
Usage: CAReduceDtype(scalar_op, axis=None, dtype=None, acc_dtype=None)
:param scalar_op: a binary scalar op with only one output.
It must be commutative and associative.
......
......@@ -162,7 +162,7 @@ class T_sigmoid_opts(unittest.TestCase):
f = theano.function([x], (T.fill(x, -1.0) * T.exp(x)) /
((1 + T.exp(x)) * (1 + T.exp(-x))), mode=m)
assert [node.op for node in f.maker.fgraph.toposort()] == [sigmoid,
T.mul, theano.tensor.inplace.neg_inplace]
T.mul]
f(data)
f = theano.function([x], (T.fill(x, -1.1) * T.exp(x)) /
((1 + T.exp(x)) * (1 + T.exp(-x))), mode=m)
......@@ -238,7 +238,7 @@ class T_sigmoid_opts(unittest.TestCase):
tensor.exp(x * y) * tensor.exp(y)),
mode=m)
match(f, [sigmoid, tensor.mul, tensor.neg, tensor.exp, sigmoid,
tensor.mul, tensor.neg])
tensor.mul])
def test_perform_sigm_times_exp(self):
"""
......
差异被折叠。
......@@ -318,11 +318,11 @@ class Subtensor(Op):
if (isinstance(entry, gof.Variable)
and entry.type in tensor_types
and numpy.all(entry.type.broadcastable)):
return scal.Scalar(entry.type.dtype)
return scal.get_scalar_type(entry.type.dtype)
elif (isinstance(entry, gof.Type)
and entry in tensor_types
and numpy.all(entry.broadcastable)):
return scal.Scalar(entry.dtype)
return scal.get_scalar_type(entry.dtype)
elif slice_ok and isinstance(entry, slice):
a = entry.start
b = entry.stop
......
......@@ -2838,7 +2838,7 @@ def test_local_mul_specialize():
nodes = [node.op for node in f.maker.fgraph.toposort()]
print nodes
theano.printing.debugprint(f)
assert nodes == [T.mul, inplace.neg_inplace]
assert nodes == [T.mul]
f = function([v, m], v * 0 * (-m), mode=mode)
nodes = [node.op for node in f.maker.fgraph.toposort()]
......@@ -2852,6 +2852,12 @@ def test_local_mul_specialize():
theano.printing.debugprint(f)
assert nodes == [T.mul]
f = function([v, m], v * (-1) * m, mode=mode)
nodes = [node.op for node in f.maker.fgraph.toposort()]
print nodes
theano.printing.debugprint(f)
assert nodes == [T.mul]
def speed_local_pow_specialize_range():
val = numpy.random.rand(1e7)
......@@ -4000,27 +4006,6 @@ def test_local_join_1():
assert f.maker.fgraph.outputs[0].dtype == config.floatX
def test_local_mul_to_neg():
"""
Test that a multiplication by -1 or -1.0 yields the appropriate data type
"""
a = T.imatrix()
f1 = theano.function([a], -1 * a)
f2 = theano.function([a], -1.0 * a)
aval = numpy.random.randint(0, 10, (2, 2)).astype('int32')
if config.cast_policy == 'custom':
assert f1(aval).dtype == a.dtype
assert f2(aval).dtype == 'float64'
elif config.cast_policy == 'numpy':
assert f1(aval).dtype == str(numpy.array(0).dtype)
assert f2(aval).dtype == 'float64'
elif config.cast_policy == 'numpy+floatX':
assert f1(aval).dtype == str(numpy.array(0).dtype)
assert f2(aval).dtype == config.floatX
else:
raise NotImplementedError(config.cast_policy)
def test_local_add_specialize():
# test of non-zero dimension
a = tensor.vector()
......
......@@ -240,7 +240,7 @@ class TensorType(Type):
% (self.__class__.__name__, self.dtype))
def to_scalar_type(self):
return scal.Scalar(dtype=self.dtype)
return scal.get_scalar_type(dtype=self.dtype)
def __eq__(self, other):
"""Compare True iff other is the same kind of TensorType"""
......@@ -538,23 +538,23 @@ class TensorType(Type):
def c_headers(self):
"""Override `CLinkerObject.c_headers` """
return scal.Scalar(self.dtype).c_headers()
return scal.get_scalar_type(self.dtype).c_headers()
def c_libraries(self):
return scal.Scalar(self.dtype).c_libraries()
return scal.get_scalar_type(self.dtype).c_libraries()
def c_compile_args(self):
return scal.Scalar(self.dtype).c_compile_args()
return scal.get_scalar_type(self.dtype).c_compile_args()
def c_support_code(self):
"""Override `CLinkerObject.c_support_code` """
return scal.Scalar(self.dtype).c_support_code()
return scal.get_scalar_type(self.dtype).c_support_code()
def c_init_code(self):
return scal.Scalar(self.dtype).c_init_code()
return scal.get_scalar_type(self.dtype).c_init_code()
def c_code_cache_version(self):
scalar_version = scal.Scalar(self.dtype).c_code_cache_version()
scalar_version = scal.get_scalar_type(self.dtype).c_code_cache_version()
if scalar_version:
return (11,) + scalar_version
else:
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论