diff options
author | Vratko Polak <vrpolak@cisco.com> | 2022-08-09 14:56:15 +0200 |
---|---|---|
committer | Tibor Frank <tifrank@cisco.com> | 2022-08-15 10:58:57 +0000 |
commit | 4bfbd4d72ad53eb1694868c19640c8b4a17d32cb (patch) | |
tree | 0a566caa3a9ce141f8045bf22c395833355f3a7c /resources/libraries/python/jumpavg/BitCountingGroup.py | |
parent | c1b770bc71eda83468c0e2a97c851b831b76641b (diff) |
feat(jumpavg): speed up, use Python 3.8 features
+ The main speedup comes from abandoning copy.deepcopy(),
doing shallow list copies (at most) and introcuding copy_fast().
+ Turn into dataclasses whenever possible, use type hints.
+ Simplify the partition search code,
+ It is now clearer the time complexity is O(N*(N+n)),
where N is number of samples, and n is the average size
of the last group of the current record glist.
+ Used black for formatting, so no u"" anymore.
+ Update metadata for 0.3.0 release.
Change-Id: I302203b4d42aeb22be1128e2fe72353a44eae5d0
Signed-off-by: Vratko Polak <vrpolak@cisco.com>
Diffstat (limited to 'resources/libraries/python/jumpavg/BitCountingGroup.py')
-rw-r--r-- | resources/libraries/python/jumpavg/BitCountingGroup.py | 134 |
1 files changed, 59 insertions, 75 deletions
diff --git a/resources/libraries/python/jumpavg/BitCountingGroup.py b/resources/libraries/python/jumpavg/BitCountingGroup.py index f1bdc502fd..48bea086f4 100644 --- a/resources/libraries/python/jumpavg/BitCountingGroup.py +++ b/resources/libraries/python/jumpavg/BitCountingGroup.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021 Cisco and/or its affiliates. +# Copyright (c) 2022 Cisco and/or its affiliates. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: @@ -13,14 +13,16 @@ """Module holding BitCountingGroup class.""" -import copy +import collections +import dataclasses +import typing from .AvgStdevStats import AvgStdevStats from .BitCountingStats import BitCountingStats -class BitCountingGroup: - # TODO: Inherit from collections.abc.Sequence in Python 3. +@dataclasses.dataclass +class BitCountingGroup(collections.abc.Sequence): """Group of runs which tracks bit count in an efficient manner. This class contains methods that mutate the internal state, @@ -38,74 +40,58 @@ class BitCountingGroup: a method to add a single run in an efficient manner is provided. """ - def __init__(self, run_list=None, stats=None, bits=None, - max_value=None, prev_avg=None, comment="unknown"): - """Set the internal state and partially the stats. - - A "group" stands for an Iterable of runs, where "run" is either - a float value, or a stats-like object (only size, avg and stdev - are accessed). Run is a hypothetical abstract class, - defining it in Python 2 is too much hassle. - - Only a copy of the run list argument value is stored in the instance, - so it is not a problem if the value object is mutated afterwards. + run_list: typing.List[typing.Union[float, AvgStdevStats]] + """List of run to compose into this group. + The init call takes ownership of the list, + so the caller should clone it to avoid unexpected muations.""" + max_value: float + """Maximal sample value to expect.""" + comment: str = "unknown" + """Any string giving more info, e.g. "regression".""" + prev_avg: typing.Optional[float] = None + """Average of the previous group, if any.""" + stats: AvgStdevStats = None + """Stats object used for computing bits. + Almost always recomputed, except when non-None in init.""" + cached_bits: typing.Optional[float] = None + """Cached value of information content. + Noned on edit, recomputed if needed and None.""" + + def __post_init__(self): + """Recompute stats is None. It is not verified whether the user provided values are valid, e.g. whether the stats and bits values reflect the runs. - - :param run_list: List of run to compose into this group. Default: empty. - :param stats: Stats object used for computing bits. - :param bits: Cached value of information content. - :param max_value: Maximal sample value to be used for computing. - :param prev_avg: Average of the previous group, affects bits. - :param comment: Any string giving more info, e.g. "regression". - :type run_list: Iterable[Run] - :type stats: Optional[AvgStdevStats] - :type bits: Optional[float] - :type max_value: float - :type prev_avg: Optional[float] - :type comment: str """ - self.run_list = copy.deepcopy(run_list) if run_list else list() - self.stats = stats - self.cached_bits = bits - self.max_value = max_value - self.prev_avg = prev_avg - self.comment = comment if self.stats is None: self.stats = AvgStdevStats.for_runs(self.run_list) - def __str__(self): - """Return string with human readable description of the group. - - :returns: Readable description. - :rtype: str - """ - return f"stats={self.stats} bits={self.cached_bits}" + @property + def bits(self) -> float: + """Return overall bit content of the group list. - def __repr__(self): - """Return string executable as Python constructor call. + If not cached, compute from stats and cache. - :returns: Executable constructor call. - :rtype: str + :returns: The overall information content in bits. + :rtype: float """ - return ( - f"BitCountingGroup(run_list={self.run_list!r},stats={self.stats!r}" - f",bits={self.cached_bits!r},max_value={self.max_value!r}" - f",prev_avg={self.prev_avg!r},comment={self.comment!r})" - ) + if self.cached_bits is None: + self.cached_bits = BitCountingStats.for_runs( + [self.stats], self.max_value, self.prev_avg + ).bits + return self.cached_bits - def __getitem__(self, index): + def __getitem__(self, index: int) -> typing.Union[float, AvgStdevStats]: """Return the run at the index. :param index: Index of the run to return. :type index: int :returns: The run at the index. - :rtype: Run + :rtype: typing.Union[float, AvgStdevStats] """ return self.run_list[index] - def __len__(self): + def __len__(self) -> int: """Return the number of runs in the group. :returns: The Length of run_list. @@ -113,39 +99,35 @@ class BitCountingGroup: """ return len(self.run_list) - def copy(self): + def copy(self) -> "BitCountingGroup": """Return a new instance with copied internal state. + Stats are preserved to avoid re-computation. + As both float and AvgStdevStats are effectively immutable, + only a shallow copy of the runs list is performed. + :returns: The copied instance. :rtype: BitCountingGroup """ stats = AvgStdevStats.for_runs([self.stats]) return self.__class__( - run_list=self.run_list, stats=stats, bits=self.cached_bits, - max_value=self.max_value, prev_avg=self.prev_avg, - comment=self.comment) - - @property - def bits(self): - """Return overall bit content of the group list. - - If not cached, compute from stats and cache. - - :returns: The overall information content in bits. - :rtype: float - """ - if self.cached_bits is None: - self.cached_bits = BitCountingStats.for_runs( - [self.stats], self.max_value, self.prev_avg).bits - return self.cached_bits + run_list=list(self.run_list), + stats=stats, + cached_bits=self.cached_bits, + max_value=self.max_value, + prev_avg=self.prev_avg, + comment=self.comment, + ) - def append(self, run): + def append( + self, run: typing.Union[float, AvgStdevStats] + ) -> "BitCountingGroup": """Mutate to add the new run, return self. Stats are updated, but old bits value is deleted from cache. :param run: The run value to add to the group. - :type value: Run + :type value: typing.Union[float, AvgStdevStats] :returns: The updated self. :rtype: BitCountingGroup """ @@ -154,7 +136,9 @@ class BitCountingGroup: self.cached_bits = None return self - def extend(self, runs): + def extend( + self, runs: typing.Iterable[typing.Union[float, AvgStdevStats]] + ) -> "BitCountingGroup": """Mutate to add the new runs, return self. This is saves small amount of computation @@ -163,7 +147,7 @@ class BitCountingGroup: Stats are updated, but old bits value is deleted from cache. :param runs: The runs to add to the group. - :type value: Iterable[Run] + :type value: typing.Iterable[typing.Union[float, AvgStdevStats]] :returns: The updated self. :rtype: BitCountingGroup """ |