diff options
author | Vratko Polak <vrpolak@cisco.com> | 2022-08-09 14:56:15 +0200 |
---|---|---|
committer | Tibor Frank <tifrank@cisco.com> | 2022-08-15 10:58:57 +0000 |
commit | 4bfbd4d72ad53eb1694868c19640c8b4a17d32cb (patch) | |
tree | 0a566caa3a9ce141f8045bf22c395833355f3a7c | |
parent | c1b770bc71eda83468c0e2a97c851b831b76641b (diff) |
feat(jumpavg): speed up, use Python 3.8 features
+ The main speedup comes from abandoning copy.deepcopy(),
doing shallow list copies (at most) and introcuding copy_fast().
+ Turn into dataclasses whenever possible, use type hints.
+ Simplify the partition search code,
+ It is now clearer the time complexity is O(N*(N+n)),
where N is number of samples, and n is the average size
of the last group of the current record glist.
+ Used black for formatting, so no u"" anymore.
+ Update metadata for 0.3.0 release.
Change-Id: I302203b4d42aeb22be1128e2fe72353a44eae5d0
Signed-off-by: Vratko Polak <vrpolak@cisco.com>
-rw-r--r-- | PyPI/jumpavg/README.rst | 5 | ||||
-rw-r--r-- | PyPI/jumpavg/setup.py | 6 | ||||
-rw-r--r-- | resources/libraries/python/jumpavg/AvgStdevStats.py | 56 | ||||
-rw-r--r-- | resources/libraries/python/jumpavg/BitCountingGroup.py | 134 | ||||
-rw-r--r-- | resources/libraries/python/jumpavg/BitCountingGroupList.py | 128 | ||||
-rw-r--r-- | resources/libraries/python/jumpavg/BitCountingStats.py | 110 | ||||
-rw-r--r-- | resources/libraries/python/jumpavg/classify.py | 46 |
7 files changed, 221 insertions, 264 deletions
diff --git a/PyPI/jumpavg/README.rst b/PyPI/jumpavg/README.rst index 3161988f69..f66ea9d39b 100644 --- a/PyPI/jumpavg/README.rst +++ b/PyPI/jumpavg/README.rst @@ -25,6 +25,11 @@ Change log TODO: Move into separate file? +0.3.0: Considerable speedup by avoiding unneeded copy. Dataclasses used. + Mostly API compatible, but repr looks different. + +0.2.0: API incompatible changes. Targeted to Python 3 now. + 0.1.3: Changed stdev computation to avoid negative variance due to rounding errors. 0.1.2: First version published in PyPI. diff --git a/PyPI/jumpavg/setup.py b/PyPI/jumpavg/setup.py index 28ddfcf0b1..129277a26e 100644 --- a/PyPI/jumpavg/setup.py +++ b/PyPI/jumpavg/setup.py @@ -16,7 +16,7 @@ with open(path.join(here, u"README.rst"), encoding=u"utf-8") as f: setup( name=u"jumpavg", - version=u"0.2.0", # This is currently the only place listing the version. + version=u"0.3.0", # This is currently the only place listing the version. description=( u"Library for locating changes in time series by grouping results." ), @@ -36,12 +36,12 @@ setup( u"License :: OSI Approved :: Apache Software License", u"Natural Language :: English", # TODO: Test which Python versions is the code compatible with. - u"Programming Language :: Python :: 2.7", + u"Programming Language :: Python :: 3.8", u"Topic :: Scientific/Engineering :: Information Analysis" ], keywords=u"progression regression anomaly detection statistics bits", packages=find_packages(exclude=[]), - python_requires="~=3.6", + python_requires="~=3.8", install_requires=[], # TODO: Include simulator and tests. extras_require={ diff --git a/resources/libraries/python/jumpavg/AvgStdevStats.py b/resources/libraries/python/jumpavg/AvgStdevStats.py index 4720c10f3d..d40b316bf1 100644 --- a/resources/libraries/python/jumpavg/AvgStdevStats.py +++ b/resources/libraries/python/jumpavg/AvgStdevStats.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021 Cisco and/or its affiliates. +# Copyright (c) 2022 Cisco and/or its affiliates. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: @@ -13,9 +13,12 @@ """Module holding AvgStdevStats class.""" +import dataclasses import math +import typing +@dataclasses.dataclass class AvgStdevStats: """Class for statistics which include average and stdev of a group. @@ -25,45 +28,18 @@ class AvgStdevStats: Instances are only statistics, the data itself is stored elsewhere. """ - def __init__(self, size=0, avg=0.0, stdev=0.0): - """Construct the stats object by storing the values needed. - - Each value has to be numeric. - The values are not sanitized depending on size, wrong initialization - can cause delayed math errors. - - :param size: Number of values participating in this group. - :param avg: Population average of the participating sample values. - :param stdev: Population standard deviation of the sample values. - :type size: int - :type avg: float - :type stdev: float - """ - self.size = size - self.avg = avg - self.stdev = stdev - - def __str__(self): - """Return string with human readable description of the group. - - :returns: Readable description. - :rtype: str - """ - return f"size={self.size} avg={self.avg} stdev={self.stdev}" - - def __repr__(self): - """Return string executable as Python constructor call. - - :returns: Executable constructor call. - :rtype: str - """ - return ( - f"AvgStdevStats(size={self.size!r},avg={self.avg!r}" - f",stdev={self.stdev!r})" - ) + size: int = 0 + """Number of scalar values (samples) participating in this group.""" + avg: float = 0.0 + """Population average of the participating sample values.""" + stdev: float = 0.0 + """Population standard deviation of the sample values.""" @classmethod - def for_runs(cls, runs): + def for_runs( + cls, + runs: typing.Iterable[typing.Union[float, "AvgStdevStats"]], + ) -> "AvgStdevStats": """Return new stats instance describing the sequence of runs. If you want to append data to existing stats object, @@ -72,8 +48,8 @@ class AvgStdevStats: Instead of a verb, "for" is used to start this method name, to signify the result contains less information than the input data. - Here, Run is a hypothetical abstract class, an union of float and cls. - Defining that as a real abstract class in Python 2 is too much hassle. + Here, run is a hypothetical abstract class, an union of float and cls. + Defining that as a real abstract class in Python is too much hassle. :param runs: Sequence of data to describe by the new metadata. :type runs: Iterable[Union[float, cls]] diff --git a/resources/libraries/python/jumpavg/BitCountingGroup.py b/resources/libraries/python/jumpavg/BitCountingGroup.py index f1bdc502fd..48bea086f4 100644 --- a/resources/libraries/python/jumpavg/BitCountingGroup.py +++ b/resources/libraries/python/jumpavg/BitCountingGroup.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021 Cisco and/or its affiliates. +# Copyright (c) 2022 Cisco and/or its affiliates. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: @@ -13,14 +13,16 @@ """Module holding BitCountingGroup class.""" -import copy +import collections +import dataclasses +import typing from .AvgStdevStats import AvgStdevStats from .BitCountingStats import BitCountingStats -class BitCountingGroup: - # TODO: Inherit from collections.abc.Sequence in Python 3. +@dataclasses.dataclass +class BitCountingGroup(collections.abc.Sequence): """Group of runs which tracks bit count in an efficient manner. This class contains methods that mutate the internal state, @@ -38,74 +40,58 @@ class BitCountingGroup: a method to add a single run in an efficient manner is provided. """ - def __init__(self, run_list=None, stats=None, bits=None, - max_value=None, prev_avg=None, comment="unknown"): - """Set the internal state and partially the stats. - - A "group" stands for an Iterable of runs, where "run" is either - a float value, or a stats-like object (only size, avg and stdev - are accessed). Run is a hypothetical abstract class, - defining it in Python 2 is too much hassle. - - Only a copy of the run list argument value is stored in the instance, - so it is not a problem if the value object is mutated afterwards. + run_list: typing.List[typing.Union[float, AvgStdevStats]] + """List of run to compose into this group. + The init call takes ownership of the list, + so the caller should clone it to avoid unexpected muations.""" + max_value: float + """Maximal sample value to expect.""" + comment: str = "unknown" + """Any string giving more info, e.g. "regression".""" + prev_avg: typing.Optional[float] = None + """Average of the previous group, if any.""" + stats: AvgStdevStats = None + """Stats object used for computing bits. + Almost always recomputed, except when non-None in init.""" + cached_bits: typing.Optional[float] = None + """Cached value of information content. + Noned on edit, recomputed if needed and None.""" + + def __post_init__(self): + """Recompute stats is None. It is not verified whether the user provided values are valid, e.g. whether the stats and bits values reflect the runs. - - :param run_list: List of run to compose into this group. Default: empty. - :param stats: Stats object used for computing bits. - :param bits: Cached value of information content. - :param max_value: Maximal sample value to be used for computing. - :param prev_avg: Average of the previous group, affects bits. - :param comment: Any string giving more info, e.g. "regression". - :type run_list: Iterable[Run] - :type stats: Optional[AvgStdevStats] - :type bits: Optional[float] - :type max_value: float - :type prev_avg: Optional[float] - :type comment: str """ - self.run_list = copy.deepcopy(run_list) if run_list else list() - self.stats = stats - self.cached_bits = bits - self.max_value = max_value - self.prev_avg = prev_avg - self.comment = comment if self.stats is None: self.stats = AvgStdevStats.for_runs(self.run_list) - def __str__(self): - """Return string with human readable description of the group. - - :returns: Readable description. - :rtype: str - """ - return f"stats={self.stats} bits={self.cached_bits}" + @property + def bits(self) -> float: + """Return overall bit content of the group list. - def __repr__(self): - """Return string executable as Python constructor call. + If not cached, compute from stats and cache. - :returns: Executable constructor call. - :rtype: str + :returns: The overall information content in bits. + :rtype: float """ - return ( - f"BitCountingGroup(run_list={self.run_list!r},stats={self.stats!r}" - f",bits={self.cached_bits!r},max_value={self.max_value!r}" - f",prev_avg={self.prev_avg!r},comment={self.comment!r})" - ) + if self.cached_bits is None: + self.cached_bits = BitCountingStats.for_runs( + [self.stats], self.max_value, self.prev_avg + ).bits + return self.cached_bits - def __getitem__(self, index): + def __getitem__(self, index: int) -> typing.Union[float, AvgStdevStats]: """Return the run at the index. :param index: Index of the run to return. :type index: int :returns: The run at the index. - :rtype: Run + :rtype: typing.Union[float, AvgStdevStats] """ return self.run_list[index] - def __len__(self): + def __len__(self) -> int: """Return the number of runs in the group. :returns: The Length of run_list. @@ -113,39 +99,35 @@ class BitCountingGroup: """ return len(self.run_list) - def copy(self): + def copy(self) -> "BitCountingGroup": """Return a new instance with copied internal state. + Stats are preserved to avoid re-computation. + As both float and AvgStdevStats are effectively immutable, + only a shallow copy of the runs list is performed. + :returns: The copied instance. :rtype: BitCountingGroup """ stats = AvgStdevStats.for_runs([self.stats]) return self.__class__( - run_list=self.run_list, stats=stats, bits=self.cached_bits, - max_value=self.max_value, prev_avg=self.prev_avg, - comment=self.comment) - - @property - def bits(self): - """Return overall bit content of the group list. - - If not cached, compute from stats and cache. - - :returns: The overall information content in bits. - :rtype: float - """ - if self.cached_bits is None: - self.cached_bits = BitCountingStats.for_runs( - [self.stats], self.max_value, self.prev_avg).bits - return self.cached_bits + run_list=list(self.run_list), + stats=stats, + cached_bits=self.cached_bits, + max_value=self.max_value, + prev_avg=self.prev_avg, + comment=self.comment, + ) - def append(self, run): + def append( + self, run: typing.Union[float, AvgStdevStats] + ) -> "BitCountingGroup": """Mutate to add the new run, return self. Stats are updated, but old bits value is deleted from cache. :param run: The run value to add to the group. - :type value: Run + :type value: typing.Union[float, AvgStdevStats] :returns: The updated self. :rtype: BitCountingGroup """ @@ -154,7 +136,9 @@ class BitCountingGroup: self.cached_bits = None return self - def extend(self, runs): + def extend( + self, runs: typing.Iterable[typing.Union[float, AvgStdevStats]] + ) -> "BitCountingGroup": """Mutate to add the new runs, return self. This is saves small amount of computation @@ -163,7 +147,7 @@ class BitCountingGroup: Stats are updated, but old bits value is deleted from cache. :param runs: The runs to add to the group. - :type value: Iterable[Run] + :type value: typing.Iterable[typing.Union[float, AvgStdevStats]] :returns: The updated self. :rtype: BitCountingGroup """ diff --git a/resources/libraries/python/jumpavg/BitCountingGroupList.py b/resources/libraries/python/jumpavg/BitCountingGroupList.py index 6a1c86baf2..468e79b236 100644 --- a/resources/libraries/python/jumpavg/BitCountingGroupList.py +++ b/resources/libraries/python/jumpavg/BitCountingGroupList.py @@ -13,13 +13,16 @@ """Module holding BitCountingGroupList class.""" -import copy +import collections +import dataclasses +import typing +from .AvgStdevStats import AvgStdevStats # Just for type hints. from .BitCountingGroup import BitCountingGroup -class BitCountingGroupList: - # TODO: Inherit from collections.abc.Sequence in Python 3. +@dataclasses.dataclass +class BitCountingGroupList(collections.abc.Sequence): """List of data groups which tracks overall bit count. The Sequence-like access is related to the list of groups, @@ -41,55 +44,27 @@ class BitCountingGroupList: recalculations if the bit count is not needed. """ - def __init__(self, group_list=None, bits_except_last=0.0, max_value=None): - """Set the internal state without any calculations. - - The group list argument is copied deeply, so it is not a problem - if the value object is mutated afterwards. + max_value: float + """Maximal sample value to base bits computation on.""" + group_list: typing.List[BitCountingGroup] = None + """List of groups to compose this group list. + Init also accepts None standing for an empty list. + This class takes ownership of the list, + so caller of init should clone their copy to avoid unexpected mutations. + """ + bits_except_last: float = 0.0 + """Partial sum of all but one group bits.""" - A "group" stands for an Iterable of runs, where "run" is either - a float value, or a stats-like object (only size, avg and stdev - are accessed). Run is a hypothetical abstract class, - defining it in Python 2 is too much hassle. + def __post_init__(self): + """Turn possible None into an empty list. It is not verified whether the user provided values are valid, - e.g. whether the cached bits values make sense. - - The max_value is required and immutable, - it is recommended the callers find their maximum beforehand. - - :param group_list: List of groups to compose this group list (or empty). - :param bits_except_last: Partial sum of all but one group bits. - :param max_value: Maximal sample value to base bits computation on. - :type group_list: Iterable[BitCountingGroup] - :type bits_except_last: float - :type max_value: float - """ - self.group_list = copy.deepcopy(group_list) if group_list else list() - self.bits_except_last = bits_except_last - self.max_value = max_value - - def __str__(self): - """Return string with human readable description of the group list. - - :returns: Readable description. - :rtype: str + e.g. whether the cached bits values (and bits_except_last) make sense. """ - return f"group_list={self.group_list} bits={self.bits}" - - def __repr__(self): - """Return string executable as Python constructor call. + if self.group_list is None: + self.group_list = list() - :returns: Executable constructor call. - :rtype: str - """ - return ( - f"BitCountingGroupList(group_list={self.group_list!r}" - f",bits_except_last={self.bits_except_last!r}" - f",max_value={self.max_value!r})" - ) - - def __getitem__(self, index): + def __getitem__(self, index: int) -> BitCountingGroup: """Return the group at the index. :param index: Index of the group to return. @@ -99,7 +74,7 @@ class BitCountingGroupList: """ return self.group_list[index] - def __len__(self): + def __len__(self) -> int: """Return the length of the group list. :returns: The Length of group_list. @@ -107,19 +82,44 @@ class BitCountingGroupList: """ return len(self.group_list) - def copy(self): + def copy(self) -> "BitCountingGroupList": """Return a new instance with copied internal state. :returns: The copied instance. :rtype: BitCountingGroupList """ return self.__class__( - group_list=self.group_list, bits_except_last=self.bits_except_last, - max_value=self.max_value + max_value=self.max_value, + group_list=[group.copy() for group in self.group_list], + bits_except_last=self.bits_except_last, + ) + + def copy_fast(self) -> "BitCountingGroupList": + """Return a new instance with minimaly copied internal state. + + The assumption here is that only the last group will ever be mutated + (in self, probably never in the return value), + so all the previous groups can be "copied by reference". + + :returns: The copied instance. + :rtype: BitCountingGroupList + """ + group_list = list(self.group_list) + if group_list: + group_list[-1] = group_list[-1].copy() + # Further speedup is possible by keeping the last group + # as a singly linked (from end) list, + # but for CSIT sample sizes, copy of whole Python list is faster. + # TODO: Implement linked list as an option + # for users with many samples. + return self.__class__( + max_value=self.max_value, + group_list=group_list, + bits_except_last=self.bits_except_last, ) @property - def bits(self): + def bits(self) -> float: """Return overall bit content of the group list. :returns: The overall information content in bits. @@ -130,12 +130,17 @@ class BitCountingGroupList: # TODO: Is it worth to cache the overall result? return self.bits_except_last + self.group_list[-1].bits - def append_group_of_runs(self, runs): + def append_group_of_runs( + self, + runs: typing.Union[ + BitCountingGroup, typing.List[typing.Union[float, AvgStdevStats]] + ], + ) -> "BitCountingGroupList": """Mutate to add a new group based on the runs, return self. - The argument is copied before adding to the group list, - so further edits do not affect the grup list. - The argument can also be a group, only runs from it are used. + The list argument is NOT copied before adding to the group list, + so further edits MAY not affect the grup list. + The list from BitCountingGroup is shallow copied though. :param runs: Runs to form the next group to be appended to self. :type runs: Union[Iterable[Run], BitCountingGroup] @@ -151,12 +156,15 @@ class BitCountingGroupList: new_group.cached_bits = None else: new_group = BitCountingGroup( - run_list=runs, max_value=self.max_value, prev_avg=prev_avg) + run_list=runs, max_value=self.max_value, prev_avg=prev_avg + ) self.bits_except_last = self.bits self.group_list.append(new_group) return self - def append_run_to_to_last_group(self, run): + def append_run_to_to_last_group( + self, run: typing.Union[float, AvgStdevStats] + ) -> "BitCountingGroupList": """Mutate to add new run at the end of the last group. Basically a one-liner, only returning group list instead of last group. @@ -170,7 +178,9 @@ class BitCountingGroupList: self.group_list[-1].append(run) return self - def extend_runs_to_last_group(self, runs): + def extend_runs_to_last_group( + self, runs: typing.Iterable[typing.Union[float, AvgStdevStats]] + ) -> "BitCountingGroupList": """Mutate to add new runs to the end of the last group. A faster alternative to appending runs one by one in a loop. diff --git a/resources/libraries/python/jumpavg/BitCountingStats.py b/resources/libraries/python/jumpavg/BitCountingStats.py index 7b5e659214..524ac952c8 100644 --- a/resources/libraries/python/jumpavg/BitCountingStats.py +++ b/resources/libraries/python/jumpavg/BitCountingStats.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021 Cisco and/or its affiliates. +# Copyright (c) 2022 Cisco and/or its affiliates. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: @@ -13,11 +13,14 @@ """Module holding BitCountingStats class.""" +import dataclasses import math +import typing from .AvgStdevStats import AvgStdevStats +@dataclasses.dataclass class BitCountingStats(AvgStdevStats): """Class for statistics which include information content of a group. @@ -33,11 +36,20 @@ class BitCountingStats(AvgStdevStats): Only for_runs method calls the parent implementation, without using super(). """ - def __init__( - self, size=0, avg=None, stdev=0.0, max_value=None, prev_avg=None): - """Construct the stats object by computing from the values needed. + max_value: float = None + """Maximal sample value (real or estimated). + Default value is there just for argument ordering reasons, + leaving None leads to exceptions.""" + prev_avg: typing.Optional[float] = None + """Population average of the previous group (if any).""" + bits: float = None + """The computed information content of the group. + It is formally an argument to init function, just to keep repr string + a valid call. ut the init value is ignored and always recomputed. + """ - The values are not sanitized, faulty callers can cause math errors. + def __post_init__(self): + """Construct the stats object by computing from the values needed. The None values are allowed for stats for zero size data, but such stats can report arbitrary avg and max_value. @@ -54,91 +66,54 @@ class BitCountingStats(AvgStdevStats): (but not with floating point mechanic). The hope is the difference will have no real impact on the classification procedure. - - :param size: Number of values participating in this group. - :param avg: Population average of the participating sample values. - :param stdev: Population standard deviation of the sample values. - :param max_value: Maximal expected value. - TODO: This might be more optimal, - but max-invariant algorithm will be nicer. - :param prev_avg: Population average of the previous group. - If None, no previous average is taken into account. - If not None, the given previous average is used to discourage - consecutive groups with similar averages - (opposite triangle distribution is assumed). - :type avg: float - :type size: int - :type stdev: float - :type max_value: Union[float, NoneType] - :type prev_avg: Union[float, NoneType] """ - self.avg = avg - self.size = size - self.stdev = stdev - self.max_value = max_value - self.prev_avg = prev_avg # Zero size should in principle have non-zero bits (coding zero size), # but zero allows users to add empty groups without affecting bits. self.bits = 0.0 if self.size < 1: return - if avg is None: - raise ValueError(f"Avg is None: {self!r}") - if max_value is None or max_value <= 0.0: + if self.max_value <= 0.0: raise ValueError(f"Invalid max value: {self!r}") # Length of the sequence must be also counted in bits, # otherwise the message would not be decodable. # Model: probability of k samples is 1/k - 1/(k+1) == 1/k/(k+1) # This is compatible with zero size leading to zero bits. - self.bits += math.log(size * (size + 1), 2) - if prev_avg is None: + self.bits += math.log(self.size * (self.size + 1), 2) + if self.prev_avg is None: # Avg is considered to be uniformly distributed # from zero to max_value. - self.bits += math.log(max_value + 1.0, 2) + self.bits += math.log(self.max_value + 1.0, 2) else: # Opposite triangle distribution with minimum. self.bits += math.log( - max_value * (max_value + 1) / (abs(avg - prev_avg) + 1), 2) + (self.max_value * (self.max_value + 1)) + / (abs(self.avg - self.prev_avg) + 1), + 2, + ) if self.size < 2: return # Stdev is considered to be uniformly distributed # from zero to max_value. That is quite a bad expectation, # but resilient to negative samples etc. - self.bits += math.log(max_value + 1.0, 2) + self.bits += math.log(self.max_value + 1.0, 2) # Now we know the samples lie on sphere in size-1 dimensions. # So it is (size-2)-sphere, with radius^2 == stdev^2 * size. # https://en.wikipedia.org/wiki/N-sphere - sphere_area_ln = math.log(2) + math.log(math.pi) * ((size - 1) / 2.0) - sphere_area_ln -= math.lgamma((size - 1) / 2.0) - sphere_area_ln += math.log(stdev + 1.0) * (size - 2) - sphere_area_ln += math.log(size) * ((size - 2) / 2.0) + sphere_area_ln = math.log(2) + sphere_area_ln += math.log(math.pi) * ((self.size - 1) / 2.0) + sphere_area_ln -= math.lgamma((self.size - 1) / 2.0) + sphere_area_ln += math.log(self.stdev + 1.0) * (self.size - 2) + sphere_area_ln += math.log(self.size) * ((self.size - 2) / 2.0) self.bits += sphere_area_ln / math.log(2) - def __str__(self): - """Return string with human readable description of the group. - - :returns: Readable description. - :rtype: str - """ - return ( - f"size={self.size} avg={self.avg} stdev={self.stdev}" - f" bits={self.bits}" - ) - - def __repr__(self): - """Return string executable as Python constructor call. - - :returns: Executable constructor call. - :rtype: str - """ - return ( - f"BitCountingStats(size={self.size!r},avg={self.avg!r}" - f",stdev={self.stdev!r},max_value={self.max_value!r}" - f",prev_avg={self.prev_avg!r})" - ) - + # TODO: Rename, so pylint stops complaining about signature change. @classmethod - def for_runs(cls, runs, max_value=None, prev_avg=None): + def for_runs( + cls, + runs: typing.Iterable[typing.Union[float, AvgStdevStats]], + max_value: float, + prev_avg: typing.Optional[float] = None, + ): """Return new stats instance describing the sequence of runs. If you want to append data to existing stats object, @@ -164,6 +139,11 @@ class BitCountingStats(AvgStdevStats): :rtype: cls """ asd = AvgStdevStats.for_runs(runs) - ret_obj = cls(size=asd.size, avg=asd.avg, stdev=asd.stdev, - max_value=max_value, prev_avg=prev_avg) + ret_obj = cls( + size=asd.size, + avg=asd.avg, + stdev=asd.stdev, + max_value=max_value, + prev_avg=prev_avg, + ) return ret_obj diff --git a/resources/libraries/python/jumpavg/classify.py b/resources/libraries/python/jumpavg/classify.py index 252c71e8d5..87d2502037 100644 --- a/resources/libraries/python/jumpavg/classify.py +++ b/resources/libraries/python/jumpavg/classify.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021 Cisco and/or its affiliates. +# Copyright (c) 2022 Cisco and/or its affiliates. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: @@ -20,15 +20,20 @@ for grouping results into the list of groups, assuming each group is a population of different Gaussian distribution. """ +import typing + from .AvgStdevStats import AvgStdevStats from .BitCountingGroupList import BitCountingGroupList -def classify(values): +def classify( + values: typing.Iterable[typing.Union[float, typing.Iterable[float]]] +) -> BitCountingGroupList: """Return the values in groups of optimal bit count. Here, a value is either a float, or an iterable of floats. Such iterables represent an undivisible sequence of floats. + Int is accepted anywhere instead of float. Internally, such sequence is replaced by AvgStdevStats after maximal value is found. @@ -50,27 +55,24 @@ def classify(values): if subvalue > max_value: max_value = subvalue processed_values.append(AvgStdevStats.for_runs(value)) - open_at = list() - closed_before = [BitCountingGroupList(max_value=max_value)] - for index, value in enumerate(processed_values): - newly_open = closed_before[index].copy() - newly_open.append_group_of_runs([value]) - open_at.append(newly_open) - record_group_list = newly_open - for previous_index, old_open in enumerate(open_at[:index]): - new_open = old_open.copy().append_run_to_to_last_group(value) - open_at[previous_index] = new_open - if new_open.bits < record_group_list.bits: - record_group_list = new_open - closed_before.append(record_group_list) - partition = closed_before[-1] - previous_average = partition[0].stats.avg - for group in partition: + # Glist means group list (BitCountingGroupList). + open_glists = list() + record_glist = BitCountingGroupList(max_value=max_value) + for value in processed_values: + new_open_glist = record_glist.copy_fast().append_group_of_runs([value]) + record_glist = new_open_glist + for old_open_glist in open_glists: + old_open_glist.append_run_to_to_last_group(value) + if old_open_glist.bits < record_glist.bits: + record_glist = old_open_glist + open_glists.append(new_open_glist) + previous_average = record_glist[0].stats.avg + for group in record_glist: if group.stats.avg == previous_average: - group.comment = u"normal" + group.comment = "normal" elif group.stats.avg < previous_average: - group.comment = u"regression" + group.comment = "regression" elif group.stats.avg > previous_average: - group.comment = u"progression" + group.comment = "progression" previous_average = group.stats.avg - return partition + return record_glist |