add theano.scalar.get_scalar_type(dtype) to cache Scalar(dtype) object.

This speed up optimization.

add theano.scalar.get_scalar_type(dtype) to cache Scalar(dtype) object.
e67bed64 · Frederic · dc91b8f5 · e67bed64 · e67bed64 · e67bed64
--- a/theano/sandbox/gpuarray/elemwise.py
+++ b/theano/sandbox/gpuarray/elemwise.py
@@ -72,11 +72,11 @@ class GpuElemwise(HideC, Elemwise):
        try:
            inps = [make_argument(i, 'i%d' % (n,)) for n, i in
                    enumerate(node.inputs)]
-            scal_ins = [scalar.Scalar(i.dtype) for i in node.inputs]
+            scal_ins = [scalar.get_scalar_type(i.dtype) for i in node.inputs]

            outs = [make_argument(o, 'o%d' % (n,)) for n, o in
                    enumerate(node.outputs) if not n in self.inplace_pattern]
-            scal_out = [scalar.Scalar(o.dtype) for o in node.outputs]
+            scal_out = [scalar.get_scalar_type(o.dtype) for o in node.outputs]

            fake_node = Apply(self.scalar_op, [i() for i in scal_ins],
                              [o() for o in scal_out])
@@ -99,11 +99,11 @@ class GpuElemwise(HideC, Elemwise):
    def generate_kernel(self, node, nodename):
        inps = [make_argument(i, 'i%d' % (n,)) for n, i in
                enumerate(node.inputs)]
-        scal_ins = [scalar.Scalar(i.dtype) for i in node.inputs]
+        scal_ins = [scalar.get_scalar_type(i.dtype) for i in node.inputs]

        outs = [make_argument(o, 'o%d' % (n,)) for n, o in
                enumerate(node.outputs) if not n in self.inplace_pattern]
-        scal_out = [scalar.Scalar(o.dtype) for o in node.outputs]
+        scal_out = [scalar.get_scalar_type(o.dtype) for o in node.outputs]

        fake_node = Apply(self.scalar_op, [i() for i in scal_ins],
                          [o() for o in scal_out])

--- a/theano/scalar/basic.py
+++ b/theano/scalar/basic.py
@@ -69,6 +69,18 @@ def upcast(dtype, *dtypes):
        return rval


+def get_scalar_type(dtype):
+    """
+    Return an Scalar(dtype) object.
+
+    This cache objects to save allocation and run time.
+    """
+    if dtype not in get_scalar_type.cache:
+        get_scalar_type.cache[dtype] = Scalar(dtype=dtype)
+    return get_scalar_type.cache[dtype]
+get_scalar_type.cache = {}
+
+
 def as_scalar(x, name=None):
    if isinstance(x, gof.Apply):
        if len(x.outputs) != 1:
@@ -91,7 +103,7 @@ def constant(x):
    # purpose typically.
    if hasattr(x, 'dtype'):
        assert x.ndim == 0
-        return ScalarConstant(Scalar(str(x.dtype)), x)
+        return ScalarConstant(get_scalar_type(str(x.dtype)), x)
    if isinstance(x, builtin_float):
        for dtype in ['float32', 'float64']:
            x_ = theano._asarray(x, dtype=dtype)
@@ -99,7 +111,7 @@ def constant(x):
                break
            x_ = None
        assert x_ is not None
-        return ScalarConstant(Scalar(str(x_.dtype)), x)
+        return ScalarConstant(get_scalar_type(str(x_.dtype)), x)
    if isinstance(x, builtin_int):
        for dtype in ['int8', 'int16', 'int32', 'int64']:
            x_ = theano._asarray(x, dtype=dtype)
@@ -107,7 +119,7 @@ def constant(x):
                break
            x_ = None
        assert x_ is not None
-        return ScalarConstant(Scalar(str(x_.dtype)), x)
+        return ScalarConstant(get_scalar_type(str(x_.dtype)), x)
    if isinstance(x, builtin_complex):
        #TODO: We have added the complex type, so this should be tested
        raise NotImplementedError()
@@ -457,18 +469,18 @@ theano.compile.register_view_op_c_code(
    1)


-int8 = Scalar('int8')
-int16 = Scalar('int16')
-int32 = Scalar('int32')
-int64 = Scalar('int64')
-uint8 = Scalar('uint8')
-uint16 = Scalar('uint16')
-uint32 = Scalar('uint32')
-uint64 = Scalar('uint64')
-float32 = Scalar('float32')
-float64 = Scalar('float64')
-complex64 = Scalar('complex64')
-complex128 = Scalar('complex128')
+int8 = get_scalar_type('int8')
+int16 = get_scalar_type('int16')
+int32 = get_scalar_type('int32')
+int64 = get_scalar_type('int64')
+uint8 = get_scalar_type('uint8')
+uint16 = get_scalar_type('uint16')
+uint32 = get_scalar_type('uint32')
+uint64 = get_scalar_type('uint64')
+float32 = get_scalar_type('float32')
+float64 = get_scalar_type('float64')
+complex64 = get_scalar_type('complex64')
+complex128 = get_scalar_type('complex128')

 int_types = int8, int16, int32, int64
 uint_types = uint8, uint16, uint32, uint64
@@ -584,7 +596,7 @@ class _scalar_py_operators:
        # The second is needed for Elemwise ops to work right
        if dtype is None:
            dtype = str(self.type.dtype)
-        return second(self, ScalarConstant(Scalar(dtype), 0))
+        return second(self, ScalarConstant(get_scalar_type(dtype), 0))

    def astype(self, dtype):
        return cast(self, dtype)
@@ -628,7 +640,8 @@ complexs128 = _multi(complex128)
 # necessary to use this same mechanism in other places as well in the future.
 class upcast_out(object):
    def __new__(self, *types):
-        return Scalar(dtype=Scalar.upcast(*types)),
+        dtype = Scalar.upcast(*types)
+        return get_scalar_type(dtype),


 class upgrade_to_float(object):
@@ -644,7 +657,7 @@ class upgrade_to_float(object):
                uint16: float32,
                uint32: float64,
                uint64: float64}
-        return Scalar(Scalar.upcast(*[conv.get(type, type)
+        return get_scalar_type(Scalar.upcast(*[conv.get(type, type)
                                      for type in types])),


@@ -656,7 +669,7 @@ class same_out(object):
 def upcast_out_no_complex(*types):
    if any([type in complex_types for type in types]):
        raise TypeError('complex type are not supported')
-    return Scalar(dtype=Scalar.upcast(*types)),
+    return get_scalar_type(dtype=Scalar.upcast(*types)),


 def same_out_float_only(type):
@@ -1455,7 +1468,7 @@ def div_proxy(x, y):
 class TrueDiv(BinaryScalarOp):
    def output_types(self, types):
        if all(t in discrete_types for t in types):
-            return [Scalar(config.floatX)]
+            return [get_scalar_type(config.floatX)]
        else:
            return super(TrueDiv, self).output_types(types)


--- a/theano/scan_module/scan_utils.py
+++ b/theano/scan_module/scan_utils.py
@@ -59,7 +59,7 @@ def safe_new(x, tag='', dtype=None):
    # making the pushout optimization fail
    elif isinstance(x, scalar.ScalarVariable):
        if dtype:
-            nw_x = scalar.Scalar(dtype=dtype)()
+            nw_x = scalar.get_scalar_type(dtype=dtype)()
        else:
            nw_x = x.type()
        nw_x.name = nw_name

--- a/theano/tensor/basic.py
+++ b/theano/tensor/basic.py
@@ -991,7 +991,7 @@ class ScalarFromTensor(Op):
        assert t.type.broadcastable == ()
        return Apply(self,
                     [t],
-                     [scal.Scalar(dtype=t.type.dtype).make_variable()])
+                     [scal.get_scalar_type(dtype=t.type.dtype).make_variable()])

    def perform(self, node, inp, out_):
        s, = inp

--- a/theano/tensor/elemwise.py
+++ b/theano/tensor/elemwise.py
@@ -8,7 +8,7 @@ import theano
 from theano import gof
 from theano.gof import Apply, Op
 from theano import scalar
-from theano.scalar import Scalar
+from theano.scalar import Scalar, get_scalar_type
 from theano.printing import pprint
 from theano.gof.python25 import all, any
 from theano.tensor.utils import hash_from_dict
@@ -515,7 +515,7 @@ class Elemwise(Op):
        """
        inputs = map(as_tensor_variable, inputs)
        shadow = self.scalar_op.make_node(
-                *[Scalar(dtype=i.type.dtype)() for i in inputs])
+                *[get_scalar_type(dtype=i.type.dtype)() for i in inputs])

        target_length = max([input.type.ndim for input in inputs])

@@ -718,7 +718,7 @@ class Elemwise(Op):
            def as_scalar(t):
                if isinstance(t.type, (NullType, DisconnectedType)):
                    return t
-                return Scalar(t.type.dtype)()
+                return get_scalar_type(t.type.dtype)()

            scalar_inputs = map(as_scalar, inputs)
            scalar_ograds = map(as_scalar, ograds)
@@ -1039,9 +1039,9 @@ class Elemwise(Op):
        # We generate the C code of the inner loop using the scalar op
        task_code = self.scalar_op.c_code(
                Apply(self.scalar_op,
-                      [Scalar(dtype=input.type.dtype)()
+                      [get_scalar_type(dtype=input.type.dtype)()
                          for input in node.inputs],
-                      [Scalar(dtype=output.type.dtype)()
+                      [get_scalar_type(dtype=output.type.dtype)()
                          for output in node.outputs]),
                nodename + '_scalar_',
                ["%s_i" % s for s in _inames],
@@ -1161,11 +1161,11 @@ class Elemwise(Op):

        # now we insert versions for the ops on which we depend...
        scalar_node = Apply(self.scalar_op,
-                [Scalar(dtype=input.type.dtype)() for input in node.inputs],
-                [Scalar(dtype=output.type.dtype)() for output in node.outputs])
+                [get_scalar_type(dtype=input.type.dtype)() for input in node.inputs],
+                [get_scalar_type(dtype=output.type.dtype)() for output in node.outputs])
        version.append(self.scalar_op.c_code_cache_version_apply(scalar_node))
        for i in node.inputs + node.outputs:
-            version.append(Scalar(dtype=i.type.dtype).c_code_cache_version())
+            version.append(get_scalar_type(dtype=i.type.dtype).c_code_cache_version())
        if all(version):
            return tuple(version)
        else:
@@ -1531,9 +1531,9 @@ for(int i=0;i<PyArray_NDIM(%(iname)s);i++){
        task1_code = self.scalar_op.c_code(
                Apply(
                    self.scalar_op,
-                    [Scalar(dtype=input.type.dtype)()
+                    [get_scalar_type(dtype=input.type.dtype)()
                        for input in (node.inputs * 2)],
-                    [Scalar(dtype=output.type.dtype)()
+                    [get_scalar_type(dtype=output.type.dtype)()
                        for input in node.outputs]),
                None,
                ["%s_i" % aname, "%s_i" % inames[0]],
@@ -1583,11 +1583,11 @@ for(int i=0;i<PyArray_NDIM(%(iname)s);i++){

        # now we insert versions for the ops on which we depend...
        scalar_node = Apply(self.scalar_op,
-                [Scalar(dtype=input.type.dtype)() for input in node.inputs],
-                [Scalar(dtype=output.type.dtype)() for output in node.outputs])
+                [get_scalar_type(dtype=input.type.dtype)() for input in node.inputs],
+                [get_scalar_type(dtype=output.type.dtype)() for output in node.outputs])
        version.append(self.scalar_op.c_code_cache_version_apply(scalar_node))
        for i in node.inputs + node.outputs:
-            version.append(Scalar(dtype=i.type.dtype).c_code_cache_version())
+            version.append(get_scalar_type(dtype=i.type.dtype).c_code_cache_version())
        if all(version):
            return tuple(version)
        else:

--- a/theano/tensor/opt.py
+++ b/theano/tensor/opt.py
@@ -3583,7 +3583,7 @@ def local_pow_specialize_device(node):
            # 512 is too small for the cpu and too big for some gpu!
            if abs(y) == int(abs(y)) and abs(y) <= 512:
                pow2 = [xsym]
-                pow2_scal = [theano.scalar.Scalar(xsym.dtype)()]
+                pow2_scal = [theano.scalar.get_scalar_type(xsym.dtype)()]
                y_to_do = abs(y)
                for i in xrange(int(numpy.log2(y_to_do))):
                    pow2.append(T.sqr(pow2[i]))
@@ -4638,7 +4638,7 @@ def local_elemwise_fusion_op(OP, max_input_fct=lambda node: 1024):
                        elif ii in tmp_input:
                            tmp_s_input.append(tmp_scalar[tmp_input.index(ii)])
                        else:
-                            tmp = scalar.Scalar(ii.dtype).make_variable()
+                            tmp = scalar.get_scalar_type(ii.dtype).make_variable()
                            try:
                                tmp.tag.test_value = gof.op.get_test_value(ii).flatten()[0]
                            except AttributeError:
@@ -4692,7 +4692,7 @@ def local_elemwise_fusion_op(OP, max_input_fct=lambda node: 1024):
                if inputs.count(i) == node.inputs.count(i):
                    s = s_inputs[inputs.index(i)]
                else:
-                    s = scalar.Scalar(i.dtype).make_variable()
+                    s = scalar.get_scalar_type(i.dtype).make_variable()
                    try:
                        if theano.config.compute_test_value != 'off':
                            v = gof.op.get_test_value(i)

--- a/theano/tensor/subtensor.py
+++ b/theano/tensor/subtensor.py
@@ -318,11 +318,11 @@ class Subtensor(Op):
        if (isinstance(entry, gof.Variable)
                and entry.type in tensor_types
                and numpy.all(entry.type.broadcastable)):
-            return scal.Scalar(entry.type.dtype)
+            return scal.get_scalar_type(entry.type.dtype)
        elif (isinstance(entry, gof.Type)
                and entry in tensor_types
                and numpy.all(entry.broadcastable)):
-            return scal.Scalar(entry.dtype)
+            return scal.get_scalar_type(entry.dtype)
        elif slice_ok and isinstance(entry, slice):
            a = entry.start
            b = entry.stop

--- a/theano/tensor/type.py
+++ b/theano/tensor/type.py
@@ -240,7 +240,7 @@ class TensorType(Type):
                    % (self.__class__.__name__, self.dtype))

    def to_scalar_type(self):
-        return scal.Scalar(dtype=self.dtype)
+        return scal.get_scalar_type(dtype=self.dtype)

    def __eq__(self, other):
        """Compare True iff other is the same kind of TensorType"""
@@ -538,23 +538,23 @@ class TensorType(Type):

    def c_headers(self):
        """Override `CLinkerObject.c_headers` """
-        return scal.Scalar(self.dtype).c_headers()
+        return scal.get_scalar_type(self.dtype).c_headers()

    def c_libraries(self):
-        return scal.Scalar(self.dtype).c_libraries()
+        return scal.get_scalar_type(self.dtype).c_libraries()

    def c_compile_args(self):
-        return scal.Scalar(self.dtype).c_compile_args()
+        return scal.get_scalar_type(self.dtype).c_compile_args()

    def c_support_code(self):
        """Override `CLinkerObject.c_support_code` """
-        return scal.Scalar(self.dtype).c_support_code()
+        return scal.get_scalar_type(self.dtype).c_support_code()

    def c_init_code(self):
-        return scal.Scalar(self.dtype).c_init_code()
+        return scal.get_scalar_type(self.dtype).c_init_code()

    def c_code_cache_version(self):
-        scalar_version = scal.Scalar(self.dtype).c_code_cache_version()
+        scalar_version = scal.get_scalar_type(self.dtype).c_code_cache_version()
        if scalar_version:
            return (11,) + scalar_version
        else: