feat(jumpavg): speed up, use Python 3.8 features

+ The main speedup comes from abandoning copy.deepcopy(), doing shallow list copies (at most) and introcuding copy_fast(). + Turn into dataclasses whenever possible, use type hints. + Simplify the partition search code, + It is now clearer the time complexity is O(N*(N+n)), where N is number of samples, and n is the average size of the last group of the current record glist. + Used black for formatting, so no u"" anymore. + Update metadata for 0.3.0 release. Change-Id: I302203b4d42aeb22be1128e2fe72353a44eae5d0 Signed-off-by: Vratko Polak <vrpolak@cisco.com>
author: Vratko Polak <vrpolak@cisco.com> 2022-08-09 14:56:15 +0200
committer: Tibor Frank <tifrank@cisco.com> 2022-08-15 10:58:57 +0000
commit: 4bfbd4d72ad53eb1694868c19640c8b4a17d32cb (patch)
tree: 0a566caa3a9ce141f8045bf22c395833355f3a7c
parent: c1b770bc71eda83468c0e2a97c851b831b76641b (diff)
7 files changed, 221 insertions, 264 deletions
diff --git a/PyPI/jumpavg/README.rst b/PyPI/jumpavg/README.rst
index 3161988f69..f66ea9d39b 100644
--- a/PyPI/jumpavg/README.rst
+++ b/PyPI/jumpavg/README.rst
@@ -25,6 +25,11 @@ Change log
 
 TODO: Move into separate file?
 
+0.3.0: Considerable speedup by avoiding unneeded copy. Dataclasses used.
+       Mostly API compatible, but repr looks different.
+
+0.2.0: API incompatible changes. Targeted to Python 3 now.
+
 0.1.3: Changed stdev computation to avoid negative variance due to rounding errors.
 
 0.1.2: First version published in PyPI.
diff --git a/PyPI/jumpavg/setup.py b/PyPI/jumpavg/setup.py
index 28ddfcf0b1..129277a26e 100644
--- a/PyPI/jumpavg/setup.py
+++ b/PyPI/jumpavg/setup.py
@@ -16,7 +16,7 @@ with open(path.join(here, u"README.rst"), encoding=u"utf-8") as f:
 
 setup(
     name=u"jumpavg",
-    version=u"0.2.0",  # This is currently the only place listing the version.
+    version=u"0.3.0",  # This is currently the only place listing the version.
     description=(
         u"Library for locating changes in time series by grouping results."
     ),
@@ -36,12 +36,12 @@ setup(
         u"License :: OSI Approved :: Apache Software License",
         u"Natural Language :: English",
         # TODO: Test which Python versions is the code compatible with.
-        u"Programming Language :: Python :: 2.7",
+        u"Programming Language :: Python :: 3.8",
         u"Topic :: Scientific/Engineering :: Information Analysis"
     ],
     keywords=u"progression regression anomaly detection statistics bits",
     packages=find_packages(exclude=[]),
-    python_requires="~=3.6",
+    python_requires="~=3.8",
     install_requires=[],
     # TODO: Include simulator and tests.
     extras_require={
diff --git a/resources/libraries/python/jumpavg/AvgStdevStats.py b/resources/libraries/python/jumpavg/AvgStdevStats.py
index 4720c10f3d..d40b316bf1 100644
--- a/resources/libraries/python/jumpavg/AvgStdevStats.py
+++ b/resources/libraries/python/jumpavg/AvgStdevStats.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2021 Cisco and/or its affiliates.
+# Copyright (c) 2022 Cisco and/or its affiliates.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at:
@@ -13,9 +13,12 @@
 
 """Module holding AvgStdevStats class."""
 
+import dataclasses
 import math
+import typing
 
 
+@dataclasses.dataclass
 class AvgStdevStats:
     """Class for statistics which include average and stdev of a group.
 
@@ -25,45 +28,18 @@ class AvgStdevStats:
     Instances are only statistics, the data itself is stored elsewhere.
     """
 
-    def __init__(self, size=0, avg=0.0, stdev=0.0):
-        """Construct the stats object by storing the values needed.
-
-        Each value has to be numeric.
-        The values are not sanitized depending on size, wrong initialization
-        can cause delayed math errors.
-
-        :param size: Number of values participating in this group.
-        :param avg: Population average of the participating sample values.
-        :param stdev: Population standard deviation of the sample values.
-        :type size: int
-        :type avg: float
-        :type stdev: float
-        """
-        self.size = size
-        self.avg = avg
-        self.stdev = stdev
-
-    def __str__(self):
-        """Return string with human readable description of the group.
-
-        :returns: Readable description.
-        :rtype: str
-        """
-        return f"size={self.size} avg={self.avg} stdev={self.stdev}"
-
-    def __repr__(self):
-        """Return string executable as Python constructor call.
-
-        :returns: Executable constructor call.
-        :rtype: str
-        """
-        return (
-            f"AvgStdevStats(size={self.size!r},avg={self.avg!r}"
-            f",stdev={self.stdev!r})"
-        )
+    size: int = 0
+    """Number of scalar values (samples) participating in this group."""
+    avg: float = 0.0
+    """Population average of the participating sample values."""
+    stdev: float = 0.0
+    """Population standard deviation of the sample values."""
 
     @classmethod
-    def for_runs(cls, runs):
+    def for_runs(
+        cls,
+        runs: typing.Iterable[typing.Union[float, "AvgStdevStats"]],
+    ) -> "AvgStdevStats":
         """Return new stats instance describing the sequence of runs.
 
         If you want to append data to existing stats object,
@@ -72,8 +48,8 @@ class AvgStdevStats:
         Instead of a verb, "for" is used to start this method name,
         to signify the result contains less information than the input data.
 
-        Here, Run is a hypothetical abstract class, an union of float and cls.
-        Defining that as a real abstract class in Python 2 is too much hassle.
+        Here, run is a hypothetical abstract class, an union of float and cls.
+        Defining that as a real abstract class in Python is too much hassle.
 
         :param runs: Sequence of data to describe by the new metadata.
         :type runs: Iterable[Union[float, cls]]
diff --git a/resources/libraries/python/jumpavg/BitCountingGroup.py b/resources/libraries/python/jumpavg/BitCountingGroup.py
index f1bdc502fd..48bea086f4 100644
--- a/resources/libraries/python/jumpavg/BitCountingGroup.py
+++ b/resources/libraries/python/jumpavg/BitCountingGroup.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2021 Cisco and/or its affiliates.
+# Copyright (c) 2022 Cisco and/or its affiliates.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at:
@@ -13,14 +13,16 @@
 
 """Module holding BitCountingGroup class."""
 
-import copy
+import collections
+import dataclasses
+import typing
 
 from .AvgStdevStats import AvgStdevStats
 from .BitCountingStats import BitCountingStats
 
 
-class BitCountingGroup:
-    # TODO: Inherit from collections.abc.Sequence in Python 3.
+@dataclasses.dataclass
+class BitCountingGroup(collections.abc.Sequence):
     """Group of runs which tracks bit count in an efficient manner.
 
     This class contains methods that mutate the internal state,
@@ -38,74 +40,58 @@ class BitCountingGroup:
     a method to add a single run in an efficient manner is provided.
     """
 
-    def __init__(self, run_list=None, stats=None, bits=None,
-                 max_value=None, prev_avg=None, comment="unknown"):
-        """Set the internal state and partially the stats.
-
-        A "group" stands for an Iterable of runs, where "run" is either
-        a float value, or a stats-like object (only size, avg and stdev
-        are accessed). Run is a hypothetical abstract class,
-        defining it in Python 2 is too much hassle.
-
-        Only a copy of the run list argument value is stored in the instance,
-        so it is not a problem if the value object is mutated afterwards.
+    run_list: typing.List[typing.Union[float, AvgStdevStats]]
+    """List of run to compose into this group.
+    The init call takes ownership of the list,
+    so the caller should clone it to avoid unexpected muations."""
+    max_value: float
+    """Maximal sample value to expect."""
+    comment: str = "unknown"
+    """Any string giving more info, e.g. "regression"."""
+    prev_avg: typing.Optional[float] = None
+    """Average of the previous group, if any."""
+    stats: AvgStdevStats = None
+    """Stats object used for computing bits.
+    Almost always recomputed, except when non-None in init."""
+    cached_bits: typing.Optional[float] = None
+    """Cached value of information content.
+    Noned on edit, recomputed if needed and None."""
+
+    def __post_init__(self):
+        """Recompute stats is None.
 
         It is not verified whether the user provided values are valid,
         e.g. whether the stats and bits values reflect the runs.
-
-        :param run_list: List of run to compose into this group. Default: empty.
-        :param stats: Stats object used for computing bits.
-        :param bits: Cached value of information content.
-        :param max_value: Maximal sample value to be used for computing.
-        :param prev_avg: Average of the previous group, affects bits.
-        :param comment: Any string giving more info, e.g. "regression".
-        :type run_list: Iterable[Run]
-        :type stats: Optional[AvgStdevStats]
-        :type bits: Optional[float]
-        :type max_value: float
-        :type prev_avg: Optional[float]
-        :type comment: str
         """
-        self.run_list = copy.deepcopy(run_list) if run_list else list()
-        self.stats = stats
-        self.cached_bits = bits
-        self.max_value = max_value
-        self.prev_avg = prev_avg
-        self.comment = comment
         if self.stats is None:
             self.stats = AvgStdevStats.for_runs(self.run_list)
 
-    def __str__(self):
-        """Return string with human readable description of the group.
-
-        :returns: Readable description.
-        :rtype: str
-        """
-        return f"stats={self.stats} bits={self.cached_bits}"
+    @property
+    def bits(self) -> float:
+        """Return overall bit content of the group list.
 
-    def __repr__(self):
-        """Return string executable as Python constructor call.
+        If not cached, compute from stats and cache.
 
-        :returns: Executable constructor call.
-        :rtype: str
+        :returns: The overall information content in bits.
+        :rtype: float
         """
-        return (
-            f"BitCountingGroup(run_list={self.run_list!r},stats={self.stats!r}"
-            f",bits={self.cached_bits!r},max_value={self.max_value!r}"
-            f",prev_avg={self.prev_avg!r},comment={self.comment!r})"
-        )
+        if self.cached_bits is None:
+            self.cached_bits = BitCountingStats.for_runs(
+                [self.stats], self.max_value, self.prev_avg
+            ).bits
+        return self.cached_bits
 
-    def __getitem__(self, index):
+    def __getitem__(self, index: int) -> typing.Union[float, AvgStdevStats]:
         """Return the run at the index.
 
         :param index: Index of the run to return.
         :type index: int
         :returns: The run at the index.
-        :rtype: Run
+        :rtype: typing.Union[float, AvgStdevStats]
         """
         return self.run_list[index]
 
-    def __len__(self):
+    def __len__(self) -> int:
         """Return the number of runs in the group.
 
         :returns: The Length of run_list.
@@ -113,39 +99,35 @@ class BitCountingGroup:
         """
         return len(self.run_list)
 
-    def copy(self):
+    def copy(self) -> "BitCountingGroup":
         """Return a new instance with copied internal state.
 
+        Stats are preserved to avoid re-computation.
+        As both float and AvgStdevStats are effectively immutable,
+        only a shallow copy of the runs list is performed.
+
         :returns: The copied instance.
         :rtype: BitCountingGroup
         """
         stats = AvgStdevStats.for_runs([self.stats])
         return self.__class__(
-            run_list=self.run_list, stats=stats, bits=self.cached_bits,
-            max_value=self.max_value, prev_avg=self.prev_avg,
-            comment=self.comment)
-
-    @property
-    def bits(self):
-        """Return overall bit content of the group list.
-
-        If not cached, compute from stats and cache.
-
-        :returns: The overall information content in bits.
-        :rtype: float
-        """
-        if self.cached_bits is None:
-            self.cached_bits = BitCountingStats.for_runs(
-                [self.stats], self.max_value, self.prev_avg).bits
-        return self.cached_bits
+            run_list=list(self.run_list),
+            stats=stats,
+            cached_bits=self.cached_bits,
+            max_value=self.max_value,
+            prev_avg=self.prev_avg,
+            comment=self.comment,
+        )
 
-    def append(self, run):
+    def append(
+        self, run: typing.Union[float, AvgStdevStats]
+    ) -> "BitCountingGroup":
         """Mutate to add the new run, return self.
 
         Stats are updated, but old bits value is deleted from cache.
 
         :param run: The run value to add to the group.
-        :type value: Run
+        :type value: typing.Union[float, AvgStdevStats]
         :returns: The updated self.
         :rtype: BitCountingGroup
         """
@@ -154,7 +136,9 @@ class BitCountingGroup:
         self.cached_bits = None
         return self
 
-    def extend(self, runs):
+    def extend(
+        self, runs: typing.Iterable[typing.Union[float, AvgStdevStats]]
+    ) -> "BitCountingGroup":
         """Mutate to add the new runs, return self.
 
         This is saves small amount of computation
@@ -163,7 +147,7 @@ class BitCountingGroup:
         Stats are updated, but old bits value is deleted from cache.
 
         :param runs: The runs to add to the group.
-        :type value: Iterable[Run]
+        :type value: typing.Iterable[typing.Union[float, AvgStdevStats]]
         :returns: The updated self.
         :rtype: BitCountingGroup
         """
diff --git a/resources/libraries/python/jumpavg/BitCountingGroupList.py b/resources/libraries/python/jumpavg/BitCountingGroupList.py
index 6a1c86baf2..468e79b236 100644
--- a/resources/libraries/python/jumpavg/BitCountingGroupList.py
+++ b/resources/libraries/python/jumpavg/BitCountingGroupList.py
@@ -13,13 +13,16 @@
 
 """Module holding BitCountingGroupList class."""
 
-import copy
+import collections
+import dataclasses
+import typing
 
+from .AvgStdevStats import AvgStdevStats  # Just for type hints.
 from .BitCountingGroup import BitCountingGroup
 
 
-class BitCountingGroupList:
-    # TODO: Inherit from collections.abc.Sequence in Python 3.
+@dataclasses.dataclass
+class BitCountingGroupList(collections.abc.Sequence):
     """List of data groups which tracks overall bit count.
 
     The Sequence-like access is related to the list of groups,
@@ -41,55 +44,27 @@ class BitCountingGroupList:
     recalculations if the bit count is not needed.
     """
 
-    def __init__(self, group_list=None, bits_except_last=0.0, max_value=None):
-        """Set the internal state without any calculations.
-
-        The group list argument is copied deeply, so it is not a problem
-        if the value object is mutated afterwards.
+    max_value: float
+    """Maximal sample value to base bits computation on."""
+    group_list: typing.List[BitCountingGroup] = None
+    """List of groups to compose this group list.
+    Init also accepts None standing for an empty list.
+    This class takes ownership of the list,
+    so caller of init should clone their copy to avoid unexpected mutations.
+    """
+    bits_except_last: float = 0.0
+    """Partial sum of all but one group bits."""
 
-        A "group" stands for an Iterable of runs, where "run" is either
-        a float value, or a stats-like object (only size, avg and stdev
-        are accessed). Run is a hypothetical abstract class,
-        defining it in Python 2 is too much hassle.
+    def __post_init__(self):
+        """Turn possible None into an empty list.
 
         It is not verified whether the user provided values are valid,
-        e.g. whether the cached bits values make sense.
-
-        The max_value is required and immutable,
-        it is recommended the callers find their maximum beforehand.
-
-        :param group_list: List of groups to compose this group list (or empty).
-        :param bits_except_last: Partial sum of all but one group bits.
-        :param max_value: Maximal sample value to base bits computation on.
-        :type group_list: Iterable[BitCountingGroup]
-        :type bits_except_last: float
-        :type max_value: float
-        """
-        self.group_list = copy.deepcopy(group_list) if group_list else list()
-        self.bits_except_last = bits_except_last
-        self.max_value = max_value
-
-    def __str__(self):
-        """Return string with human readable description of the group list.
-
-        :returns: Readable description.
-        :rtype: str
+        e.g. whether the cached bits values (and bits_except_last) make sense.
         """
-        return f"group_list={self.group_list} bits={self.bits}"
-
-    def __repr__(self):
-        """Return string executable as Python constructor call.
+        if self.group_list is None:
+            self.group_list = list()
 
-        :returns: Executable constructor call.
-        :rtype: str
-        """
-        return (
-            f"BitCountingGroupList(group_list={self.group_list!r}"
-            f",bits_except_last={self.bits_except_last!r}"
-            f",max_value={self.max_value!r})"
-        )
-
-    def __getitem__(self, index):
+    def __getitem__(self, index: int) -> BitCountingGroup:
         """Return the group at the index.
 
         :param index: Index of the group to return.
@@ -99,7 +74,7 @@ class BitCountingGroupList:
         """
         return self.group_list[index]
 
-    def __len__(self):
+    def __len__(self) -> int:
         """Return the length of the group list.
 
         :returns: The Length of group_list.
@@ -107,19 +82,44 @@ class BitCountingGroupList:
         """
         return len(self.group_list)
 
-    def copy(self):
+    def copy(self) -> "BitCountingGroupList":
         """Return a new instance with copied internal state.
 
         :returns: The copied instance.
         :rtype: BitCountingGroupList
         """
         return self.__class__(
-            group_list=self.group_list, bits_except_last=self.bits_except_last,
-            max_value=self.max_value
+            max_value=self.max_value,
+            group_list=[group.copy() for group in self.group_list],
+            bits_except_last=self.bits_except_last,
+        )
+
+    def copy_fast(self) -> "BitCountingGroupList":
+        """Return a new instance with minimaly copied internal state.
+
+        The assumption here is that only the last group will ever be mutated
+        (in self, probably never in the return value),
+        so all the previous groups can be "copied by reference".
+
+        :returns: The copied instance.
+        :rtype: BitCountingGroupList
+        """
+        group_list = list(self.group_list)
+        if group_list:
+            group_list[-1] = group_list[-1].copy()
+            # Further speedup is possible by keeping the last group
+            # as a singly linked (from end) list,
+            # but for CSIT sample sizes, copy of whole Python list is faster.
+            # TODO: Implement linked list as an option
+            # for users with many samples.
+        return self.__class__(
+            max_value=self.max_value,
+            group_list=group_list,
+            bits_except_last=self.bits_except_last,
         )
 
     @property
-    def bits(self):
+    def bits(self) -> float:
         """Return overall bit content of the group list.
 
         :returns: The overall information content in bits.
@@ -130,12 +130,17 @@ class BitCountingGroupList:
         # TODO: Is it worth to cache the overall result?
         return self.bits_except_last + self.group_list[-1].bits
 
-    def append_group_of_runs(self, runs):
+    def append_group_of_runs(
+        self,
+        runs: typing.Union[
+            BitCountingGroup, typing.List[typing.Union[float, AvgStdevStats]]
+        ],
+    ) -> "BitCountingGroupList":
         """Mutate to add a new group based on the runs, return self.
 
-        The argument is copied before adding to the group list,
-        so further edits do not affect the grup list.
-        The argument can also be a group, only runs from it are used.
+        The list argument is NOT copied before adding to the group list,
+        so further edits MAY not affect the grup list.
+        The list from BitCountingGroup is shallow copied though.
 
         :param runs: Runs to form the next group to be appended to self.
         :type runs: Union[Iterable[Run], BitCountingGroup]
@@ -151,12 +156,15 @@ class BitCountingGroupList:
             new_group.cached_bits = None
         else:
             new_group = BitCountingGroup(
-                run_list=runs, max_value=self.max_value, prev_avg=prev_avg)
+                run_list=runs, max_value=self.max_value, prev_avg=prev_avg
+            )
         self.bits_except_last = self.bits
         self.group_list.append(new_group)
         return self
 
-    def append_run_to_to_last_group(self, run):
+    def append_run_to_to_last_group(
+        self, run: typing.Union[float, AvgStdevStats]
+    ) -> "BitCountingGroupList":
         """Mutate to add new run at the end of the last group.
 
         Basically a one-liner, only returning group list instead of last group.
@@ -170,7 +178,9 @@ class BitCountingGroupList:
         self.group_list[-1].append(run)
         return self
 
-    def extend_runs_to_last_group(self, runs):
+    def extend_runs_to_last_group(
+        self, runs: typing.Iterable[typing.Union[float, AvgStdevStats]]
+    ) -> "BitCountingGroupList":
         """Mutate to add new runs to the end of the last group.
 
         A faster alternative to appending runs one by one in a loop.
diff --git a/resources/libraries/python/jumpavg/BitCountingStats.py b/resources/libraries/python/jumpavg/BitCountingStats.py
index 7b5e659214..524ac952c8 100644
--- a/resources/libraries/python/jumpavg/BitCountingStats.py
+++ b/resources/libraries/python/jumpavg/BitCountingStats.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2021 Cisco and/or its affiliates.
+# Copyright (c) 2022 Cisco and/or its affiliates.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at:
@@ -13,11 +13,14 @@
 
 """Module holding BitCountingStats class."""
 
+import dataclasses
 import math
+import typing
 
 from .AvgStdevStats import AvgStdevStats
 
 
+@dataclasses.dataclass
 class BitCountingStats(AvgStdevStats):
     """Class for statistics which include information content of a group.
 
@@ -33,11 +36,20 @@ class BitCountingStats(AvgStdevStats):
     Only for_runs method calls the parent implementation, without using super().
     """
 
-    def __init__(
-            self, size=0, avg=None, stdev=0.0, max_value=None, prev_avg=None):
-        """Construct the stats object by computing from the values needed.
+    max_value: float = None
+    """Maximal sample value (real or estimated).
+    Default value is there just for argument ordering reasons,
+    leaving None leads to exceptions."""
+    prev_avg: typing.Optional[float] = None
+    """Population average of the previous group (if any)."""
+    bits: float = None
+    """The computed information content of the group.
+    It is formally an argument to init function, just to keep repr string
+    a valid call. ut the init value is ignored and always recomputed.
+    """
 
-        The values are not sanitized, faulty callers can cause math errors.
+    def __post_init__(self):
+        """Construct the stats object by computing from the values needed.
 
         The None values are allowed for stats for zero size data,
         but such stats can report arbitrary avg and max_value.
@@ -54,91 +66,54 @@ class BitCountingStats(AvgStdevStats):
         (but not with floating point mechanic).
         The hope is the difference will have
         no real impact on the classification procedure.
-
-        :param size: Number of values participating in this group.
-        :param avg: Population average of the participating sample values.
-        :param stdev: Population standard deviation of the sample values.
-        :param max_value: Maximal expected value.
-            TODO: This might be more optimal,
-            but max-invariant algorithm will be nicer.
-        :param prev_avg: Population average of the previous group.
-            If None, no previous average is taken into account.
-            If not None, the given previous average is used to discourage
-            consecutive groups with similar averages
-            (opposite triangle distribution is assumed).
-        :type avg: float
-        :type size: int
-        :type stdev: float
-        :type max_value: Union[float, NoneType]
-        :type prev_avg: Union[float, NoneType]
         """
-        self.avg = avg
-        self.size = size
-        self.stdev = stdev
-        self.max_value = max_value
-        self.prev_avg = prev_avg
         # Zero size should in principle have non-zero bits (coding zero size),
         # but zero allows users to add empty groups without affecting bits.
         self.bits = 0.0
         if self.size < 1:
             return
-        if avg is None:
-            raise ValueError(f"Avg is None: {self!r}")
-        if max_value is None or max_value <= 0.0:
+        if self.max_value <= 0.0:
             raise ValueError(f"Invalid max value: {self!r}")
         # Length of the sequence must be also counted in bits,
         # otherwise the message would not be decodable.
         # Model: probability of k samples is 1/k - 1/(k+1) == 1/k/(k+1)
         # This is compatible with zero size leading to zero bits.
-        self.bits += math.log(size * (size + 1), 2)
-        if prev_avg is None:
+        self.bits += math.log(self.size * (self.size + 1), 2)
+        if self.prev_avg is None:
             # Avg is considered to be uniformly distributed
             # from zero to max_value.
-            self.bits += math.log(max_value + 1.0, 2)
+            self.bits += math.log(self.max_value + 1.0, 2)
         else:
             # Opposite triangle distribution with minimum.
             self.bits += math.log(
-                max_value * (max_value + 1) / (abs(avg - prev_avg) + 1), 2)
+                (self.max_value * (self.max_value + 1))
+                / (abs(self.avg - self.prev_avg) + 1),
+                2,
+            )
         if self.size < 2:
             return
         # Stdev is considered to be uniformly distributed
         # from zero to max_value. That is quite a bad expectation,
         # but resilient to negative samples etc.
-        self.bits += math.log(max_value + 1.0, 2)
+        self.bits += math.log(self.max_value + 1.0, 2)
         # Now we know the samples lie on sphere in size-1 dimensions.
         # So it is (size-2)-sphere, with radius^2 == stdev^2 * size.
         # https://en.wikipedia.org/wiki/N-sphere
-        sphere_area_ln = math.log(2) + math.log(math.pi) * ((size - 1) / 2.0)
-        sphere_area_ln -= math.lgamma((size - 1) / 2.0)
-        sphere_area_ln += math.log(stdev + 1.0) * (size - 2)
-        sphere_area_ln += math.log(size) * ((size - 2) / 2.0)
+        sphere_area_ln = math.log(2)
+        sphere_area_ln += math.log(math.pi) * ((self.size - 1) / 2.0)
+        sphere_area_ln -= math.lgamma((self.size - 1) / 2.0)
+        sphere_area_ln += math.log(self.stdev + 1.0) * (self.size - 2)
+        sphere_area_ln += math.log(self.size) * ((self.size - 2) / 2.0)
         self.bits += sphere_area_ln / math.log(2)
 
-    def __str__(self):
-        """Return string with human readable description of the group.
-
-        :returns: Readable description.
-        :rtype: str
-        """
-        return (
-            f"size={self.size} avg={self.avg} stdev={self.stdev}"
-            f" bits={self.bits}"
-        )
-
-    def __repr__(self):
-        """Return string executable as Python constructor call.
-
-        :returns: Executable constructor call.
-        :rtype: str
-        """
-        return (
-            f"BitCountingStats(size={self.size!r},avg={self.avg!r}"
-            f",stdev={self.stdev!r},max_value={self.max_value!r}"
-            f",prev_avg={self.prev_avg!r})"
-        )
-
+    # TODO: Rename, so pylint stops complaining about signature change.
     @classmethod
-    def for_runs(cls, runs, max_value=None, prev_avg=None):
+    def for_runs(
+        cls,
+        runs: typing.Iterable[typing.Union[float, AvgStdevStats]],
+        max_value: float,
+        prev_avg: typing.Optional[float] = None,
+    ):
         """Return new stats instance describing the sequence of runs.
 
         If you want to append data to existing stats object,
@@ -164,6 +139,11 @@ class BitCountingStats(AvgStdevStats):
         :rtype: cls
         """
         asd = AvgStdevStats.for_runs(runs)
-        ret_obj = cls(size=asd.size, avg=asd.avg, stdev=asd.stdev,
-                      max_value=max_value, prev_avg=prev_avg)
+        ret_obj = cls(
+            size=asd.size,
+            avg=asd.avg,
+            stdev=asd.stdev,
+            max_value=max_value,
+            prev_avg=prev_avg,
+        )
         return ret_obj
diff --git a/resources/libraries/python/jumpavg/classify.py b/resources/libraries/python/jumpavg/classify.py
index 252c71e8d5..87d2502037 100644
--- a/resources/libraries/python/jumpavg/classify.py
+++ b/resources/libraries/python/jumpavg/classify.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2021 Cisco and/or its affiliates.
+# Copyright (c) 2022 Cisco and/or its affiliates.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at:
@@ -20,15 +20,20 @@ for grouping results into the list of groups,
 assuming each group is a population of different Gaussian distribution.
 """
 
+import typing
+
 from .AvgStdevStats import AvgStdevStats
 from .BitCountingGroupList import BitCountingGroupList
 
 
-def classify(values):
+def classify(
+    values: typing.Iterable[typing.Union[float, typing.Iterable[float]]]
+) -> BitCountingGroupList:
     """Return the values in groups of optimal bit count.
 
     Here, a value is either a float, or an iterable of floats.
     Such iterables represent an undivisible sequence of floats.
+    Int is accepted anywhere instead of float.
 
     Internally, such sequence is replaced by AvgStdevStats
     after maximal value is found.
@@ -50,27 +55,24 @@ def classify(values):
                 if subvalue > max_value:
                     max_value = subvalue
             processed_values.append(AvgStdevStats.for_runs(value))
-    open_at = list()
-    closed_before = [BitCountingGroupList(max_value=max_value)]
-    for index, value in enumerate(processed_values):
-        newly_open = closed_before[index].copy()
-        newly_open.append_group_of_runs([value])
-        open_at.append(newly_open)
-        record_group_list = newly_open
-        for previous_index, old_open in enumerate(open_at[:index]):
-            new_open = old_open.copy().append_run_to_to_last_group(value)
-            open_at[previous_index] = new_open
-            if new_open.bits < record_group_list.bits:
-                record_group_list = new_open
-        closed_before.append(record_group_list)
-    partition = closed_before[-1]
-    previous_average = partition[0].stats.avg
-    for group in partition:
+    # Glist means group list (BitCountingGroupList).
+    open_glists = list()
+    record_glist = BitCountingGroupList(max_value=max_value)
+    for value in processed_values:
+        new_open_glist = record_glist.copy_fast().append_group_of_runs([value])
+        record_glist = new_open_glist
+        for old_open_glist in open_glists:
+            old_open_glist.append_run_to_to_last_group(value)
+            if old_open_glist.bits < record_glist.bits:
+                record_glist = old_open_glist
+        open_glists.append(new_open_glist)
+    previous_average = record_glist[0].stats.avg
+    for group in record_glist:
         if group.stats.avg == previous_average:
-            group.comment = u"normal"
+            group.comment = "normal"
         elif group.stats.avg < previous_average:
-            group.comment = u"regression"
+            group.comment = "regression"
         elif group.stats.avg > previous_average:
-            group.comment = u"progression"
+            group.comment = "progression"
         previous_average = group.stats.avg
-    return partition
+    return record_glist
author	Vratko Polak <vrpolak@cisco.com>	2022-08-09 14:56:15 +0200
committer	Tibor Frank <tifrank@cisco.com>	2022-08-15 10:58:57 +0000
commit	4bfbd4d72ad53eb1694868c19640c8b4a17d32cb (patch)
tree	0a566caa3a9ce141f8045bf22c395833355f3a7c
parent	c1b770bc71eda83468c0e2a97c851b831b76641b (diff)