aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVratko Polak <vrpolak@cisco.com>2022-08-09 14:56:15 +0200
committerTibor Frank <tifrank@cisco.com>2022-08-15 10:58:57 +0000
commit4bfbd4d72ad53eb1694868c19640c8b4a17d32cb (patch)
tree0a566caa3a9ce141f8045bf22c395833355f3a7c
parentc1b770bc71eda83468c0e2a97c851b831b76641b (diff)
feat(jumpavg): speed up, use Python 3.8 features
+ The main speedup comes from abandoning copy.deepcopy(), doing shallow list copies (at most) and introcuding copy_fast(). + Turn into dataclasses whenever possible, use type hints. + Simplify the partition search code, + It is now clearer the time complexity is O(N*(N+n)), where N is number of samples, and n is the average size of the last group of the current record glist. + Used black for formatting, so no u"" anymore. + Update metadata for 0.3.0 release. Change-Id: I302203b4d42aeb22be1128e2fe72353a44eae5d0 Signed-off-by: Vratko Polak <vrpolak@cisco.com>
-rw-r--r--PyPI/jumpavg/README.rst5
-rw-r--r--PyPI/jumpavg/setup.py6
-rw-r--r--resources/libraries/python/jumpavg/AvgStdevStats.py56
-rw-r--r--resources/libraries/python/jumpavg/BitCountingGroup.py134
-rw-r--r--resources/libraries/python/jumpavg/BitCountingGroupList.py128
-rw-r--r--resources/libraries/python/jumpavg/BitCountingStats.py110
-rw-r--r--resources/libraries/python/jumpavg/classify.py46
7 files changed, 221 insertions, 264 deletions
diff --git a/PyPI/jumpavg/README.rst b/PyPI/jumpavg/README.rst
index 3161988f69..f66ea9d39b 100644
--- a/PyPI/jumpavg/README.rst
+++ b/PyPI/jumpavg/README.rst
@@ -25,6 +25,11 @@ Change log
TODO: Move into separate file?
+0.3.0: Considerable speedup by avoiding unneeded copy. Dataclasses used.
+ Mostly API compatible, but repr looks different.
+
+0.2.0: API incompatible changes. Targeted to Python 3 now.
+
0.1.3: Changed stdev computation to avoid negative variance due to rounding errors.
0.1.2: First version published in PyPI.
diff --git a/PyPI/jumpavg/setup.py b/PyPI/jumpavg/setup.py
index 28ddfcf0b1..129277a26e 100644
--- a/PyPI/jumpavg/setup.py
+++ b/PyPI/jumpavg/setup.py
@@ -16,7 +16,7 @@ with open(path.join(here, u"README.rst"), encoding=u"utf-8") as f:
setup(
name=u"jumpavg",
- version=u"0.2.0", # This is currently the only place listing the version.
+ version=u"0.3.0", # This is currently the only place listing the version.
description=(
u"Library for locating changes in time series by grouping results."
),
@@ -36,12 +36,12 @@ setup(
u"License :: OSI Approved :: Apache Software License",
u"Natural Language :: English",
# TODO: Test which Python versions is the code compatible with.
- u"Programming Language :: Python :: 2.7",
+ u"Programming Language :: Python :: 3.8",
u"Topic :: Scientific/Engineering :: Information Analysis"
],
keywords=u"progression regression anomaly detection statistics bits",
packages=find_packages(exclude=[]),
- python_requires="~=3.6",
+ python_requires="~=3.8",
install_requires=[],
# TODO: Include simulator and tests.
extras_require={
diff --git a/resources/libraries/python/jumpavg/AvgStdevStats.py b/resources/libraries/python/jumpavg/AvgStdevStats.py
index 4720c10f3d..d40b316bf1 100644
--- a/resources/libraries/python/jumpavg/AvgStdevStats.py
+++ b/resources/libraries/python/jumpavg/AvgStdevStats.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2021 Cisco and/or its affiliates.
+# Copyright (c) 2022 Cisco and/or its affiliates.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at:
@@ -13,9 +13,12 @@
"""Module holding AvgStdevStats class."""
+import dataclasses
import math
+import typing
+@dataclasses.dataclass
class AvgStdevStats:
"""Class for statistics which include average and stdev of a group.
@@ -25,45 +28,18 @@ class AvgStdevStats:
Instances are only statistics, the data itself is stored elsewhere.
"""
- def __init__(self, size=0, avg=0.0, stdev=0.0):
- """Construct the stats object by storing the values needed.
-
- Each value has to be numeric.
- The values are not sanitized depending on size, wrong initialization
- can cause delayed math errors.
-
- :param size: Number of values participating in this group.
- :param avg: Population average of the participating sample values.
- :param stdev: Population standard deviation of the sample values.
- :type size: int
- :type avg: float
- :type stdev: float
- """
- self.size = size
- self.avg = avg
- self.stdev = stdev
-
- def __str__(self):
- """Return string with human readable description of the group.
-
- :returns: Readable description.
- :rtype: str
- """
- return f"size={self.size} avg={self.avg} stdev={self.stdev}"
-
- def __repr__(self):
- """Return string executable as Python constructor call.
-
- :returns: Executable constructor call.
- :rtype: str
- """
- return (
- f"AvgStdevStats(size={self.size!r},avg={self.avg!r}"
- f",stdev={self.stdev!r})"
- )
+ size: int = 0
+ """Number of scalar values (samples) participating in this group."""
+ avg: float = 0.0
+ """Population average of the participating sample values."""
+ stdev: float = 0.0
+ """Population standard deviation of the sample values."""
@classmethod
- def for_runs(cls, runs):
+ def for_runs(
+ cls,
+ runs: typing.Iterable[typing.Union[float, "AvgStdevStats"]],
+ ) -> "AvgStdevStats":
"""Return new stats instance describing the sequence of runs.
If you want to append data to existing stats object,
@@ -72,8 +48,8 @@ class AvgStdevStats:
Instead of a verb, "for" is used to start this method name,
to signify the result contains less information than the input data.
- Here, Run is a hypothetical abstract class, an union of float and cls.
- Defining that as a real abstract class in Python 2 is too much hassle.
+ Here, run is a hypothetical abstract class, an union of float and cls.
+ Defining that as a real abstract class in Python is too much hassle.
:param runs: Sequence of data to describe by the new metadata.
:type runs: Iterable[Union[float, cls]]
diff --git a/resources/libraries/python/jumpavg/BitCountingGroup.py b/resources/libraries/python/jumpavg/BitCountingGroup.py
index f1bdc502fd..48bea086f4 100644
--- a/resources/libraries/python/jumpavg/BitCountingGroup.py
+++ b/resources/libraries/python/jumpavg/BitCountingGroup.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2021 Cisco and/or its affiliates.
+# Copyright (c) 2022 Cisco and/or its affiliates.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at:
@@ -13,14 +13,16 @@
"""Module holding BitCountingGroup class."""
-import copy
+import collections
+import dataclasses
+import typing
from .AvgStdevStats import AvgStdevStats
from .BitCountingStats import BitCountingStats
-class BitCountingGroup:
- # TODO: Inherit from collections.abc.Sequence in Python 3.
+@dataclasses.dataclass
+class BitCountingGroup(collections.abc.Sequence):
"""Group of runs which tracks bit count in an efficient manner.
This class contains methods that mutate the internal state,
@@ -38,74 +40,58 @@ class BitCountingGroup:
a method to add a single run in an efficient manner is provided.
"""
- def __init__(self, run_list=None, stats=None, bits=None,
- max_value=None, prev_avg=None, comment="unknown"):
- """Set the internal state and partially the stats.
-
- A "group" stands for an Iterable of runs, where "run" is either
- a float value, or a stats-like object (only size, avg and stdev
- are accessed). Run is a hypothetical abstract class,
- defining it in Python 2 is too much hassle.
-
- Only a copy of the run list argument value is stored in the instance,
- so it is not a problem if the value object is mutated afterwards.
+ run_list: typing.List[typing.Union[float, AvgStdevStats]]
+ """List of run to compose into this group.
+ The init call takes ownership of the list,
+ so the caller should clone it to avoid unexpected muations."""
+ max_value: float
+ """Maximal sample value to expect."""
+ comment: str = "unknown"
+ """Any string giving more info, e.g. "regression"."""
+ prev_avg: typing.Optional[float] = None
+ """Average of the previous group, if any."""
+ stats: AvgStdevStats = None
+ """Stats object used for computing bits.
+ Almost always recomputed, except when non-None in init."""
+ cached_bits: typing.Optional[float] = None
+ """Cached value of information content.
+ Noned on edit, recomputed if needed and None."""
+
+ def __post_init__(self):
+ """Recompute stats is None.
It is not verified whether the user provided values are valid,
e.g. whether the stats and bits values reflect the runs.
-
- :param run_list: List of run to compose into this group. Default: empty.
- :param stats: Stats object used for computing bits.
- :param bits: Cached value of information content.
- :param max_value: Maximal sample value to be used for computing.
- :param prev_avg: Average of the previous group, affects bits.
- :param comment: Any string giving more info, e.g. "regression".
- :type run_list: Iterable[Run]
- :type stats: Optional[AvgStdevStats]
- :type bits: Optional[float]
- :type max_value: float
- :type prev_avg: Optional[float]
- :type comment: str
"""
- self.run_list = copy.deepcopy(run_list) if run_list else list()
- self.stats = stats
- self.cached_bits = bits
- self.max_value = max_value
- self.prev_avg = prev_avg
- self.comment = comment
if self.stats is None:
self.stats = AvgStdevStats.for_runs(self.run_list)
- def __str__(self):
- """Return string with human readable description of the group.
-
- :returns: Readable description.
- :rtype: str
- """
- return f"stats={self.stats} bits={self.cached_bits}"
+ @property
+ def bits(self) -> float:
+ """Return overall bit content of the group list.
- def __repr__(self):
- """Return string executable as Python constructor call.
+ If not cached, compute from stats and cache.
- :returns: Executable constructor call.
- :rtype: str
+ :returns: The overall information content in bits.
+ :rtype: float
"""
- return (
- f"BitCountingGroup(run_list={self.run_list!r},stats={self.stats!r}"
- f",bits={self.cached_bits!r},max_value={self.max_value!r}"
- f",prev_avg={self.prev_avg!r},comment={self.comment!r})"
- )
+ if self.cached_bits is None:
+ self.cached_bits = BitCountingStats.for_runs(
+ [self.stats], self.max_value, self.prev_avg
+ ).bits
+ return self.cached_bits
- def __getitem__(self, index):
+ def __getitem__(self, index: int) -> typing.Union[float, AvgStdevStats]:
"""Return the run at the index.
:param index: Index of the run to return.
:type index: int
:returns: The run at the index.
- :rtype: Run
+ :rtype: typing.Union[float, AvgStdevStats]
"""
return self.run_list[index]
- def __len__(self):
+ def __len__(self) -> int:
"""Return the number of runs in the group.
:returns: The Length of run_list.
@@ -113,39 +99,35 @@ class BitCountingGroup:
"""
return len(self.run_list)
- def copy(self):
+ def copy(self) -> "BitCountingGroup":
"""Return a new instance with copied internal state.
+ Stats are preserved to avoid re-computation.
+ As both float and AvgStdevStats are effectively immutable,
+ only a shallow copy of the runs list is performed.
+
:returns: The copied instance.
:rtype: BitCountingGroup
"""
stats = AvgStdevStats.for_runs([self.stats])
return self.__class__(
- run_list=self.run_list, stats=stats, bits=self.cached_bits,
- max_value=self.max_value, prev_avg=self.prev_avg,
- comment=self.comment)
-
- @property
- def bits(self):
- """Return overall bit content of the group list.
-
- If not cached, compute from stats and cache.
-
- :returns: The overall information content in bits.
- :rtype: float
- """
- if self.cached_bits is None:
- self.cached_bits = BitCountingStats.for_runs(
- [self.stats], self.max_value, self.prev_avg).bits
- return self.cached_bits
+ run_list=list(self.run_list),
+ stats=stats,
+ cached_bits=self.cached_bits,
+ max_value=self.max_value,
+ prev_avg=self.prev_avg,
+ comment=self.comment,
+ )
- def append(self, run):
+ def append(
+ self, run: typing.Union[float, AvgStdevStats]
+ ) -> "BitCountingGroup":
"""Mutate to add the new run, return self.
Stats are updated, but old bits value is deleted from cache.
:param run: The run value to add to the group.
- :type value: Run
+ :type value: typing.Union[float, AvgStdevStats]
:returns: The updated self.
:rtype: BitCountingGroup
"""
@@ -154,7 +136,9 @@ class BitCountingGroup:
self.cached_bits = None
return self
- def extend(self, runs):
+ def extend(
+ self, runs: typing.Iterable[typing.Union[float, AvgStdevStats]]
+ ) -> "BitCountingGroup":
"""Mutate to add the new runs, return self.
This is saves small amount of computation
@@ -163,7 +147,7 @@ class BitCountingGroup:
Stats are updated, but old bits value is deleted from cache.
:param runs: The runs to add to the group.
- :type value: Iterable[Run]
+ :type value: typing.Iterable[typing.Union[float, AvgStdevStats]]
:returns: The updated self.
:rtype: BitCountingGroup
"""
diff --git a/resources/libraries/python/jumpavg/BitCountingGroupList.py b/resources/libraries/python/jumpavg/BitCountingGroupList.py
index 6a1c86baf2..468e79b236 100644
--- a/resources/libraries/python/jumpavg/BitCountingGroupList.py
+++ b/resources/libraries/python/jumpavg/BitCountingGroupList.py
@@ -13,13 +13,16 @@
"""Module holding BitCountingGroupList class."""
-import copy
+import collections
+import dataclasses
+import typing
+from .AvgStdevStats import AvgStdevStats # Just for type hints.
from .BitCountingGroup import BitCountingGroup
-class BitCountingGroupList:
- # TODO: Inherit from collections.abc.Sequence in Python 3.
+@dataclasses.dataclass
+class BitCountingGroupList(collections.abc.Sequence):
"""List of data groups which tracks overall bit count.
The Sequence-like access is related to the list of groups,
@@ -41,55 +44,27 @@ class BitCountingGroupList:
recalculations if the bit count is not needed.
"""
- def __init__(self, group_list=None, bits_except_last=0.0, max_value=None):
- """Set the internal state without any calculations.
-
- The group list argument is copied deeply, so it is not a problem
- if the value object is mutated afterwards.
+ max_value: float
+ """Maximal sample value to base bits computation on."""
+ group_list: typing.List[BitCountingGroup] = None
+ """List of groups to compose this group list.
+ Init also accepts None standing for an empty list.
+ This class takes ownership of the list,
+ so caller of init should clone their copy to avoid unexpected mutations.
+ """
+ bits_except_last: float = 0.0
+ """Partial sum of all but one group bits."""
- A "group" stands for an Iterable of runs, where "run" is either
- a float value, or a stats-like object (only size, avg and stdev
- are accessed). Run is a hypothetical abstract class,
- defining it in Python 2 is too much hassle.
+ def __post_init__(self):
+ """Turn possible None into an empty list.
It is not verified whether the user provided values are valid,
- e.g. whether the cached bits values make sense.
-
- The max_value is required and immutable,
- it is recommended the callers find their maximum beforehand.
-
- :param group_list: List of groups to compose this group list (or empty).
- :param bits_except_last: Partial sum of all but one group bits.
- :param max_value: Maximal sample value to base bits computation on.
- :type group_list: Iterable[BitCountingGroup]
- :type bits_except_last: float
- :type max_value: float
- """
- self.group_list = copy.deepcopy(group_list) if group_list else list()
- self.bits_except_last = bits_except_last
- self.max_value = max_value
-
- def __str__(self):
- """Return string with human readable description of the group list.
-
- :returns: Readable description.
- :rtype: str
+ e.g. whether the cached bits values (and bits_except_last) make sense.
"""
- return f"group_list={self.group_list} bits={self.bits}"
-
- def __repr__(self):
- """Return string executable as Python constructor call.
+ if self.group_list is None:
+ self.group_list = list()
- :returns: Executable constructor call.
- :rtype: str
- """
- return (
- f"BitCountingGroupList(group_list={self.group_list!r}"
- f",bits_except_last={self.bits_except_last!r}"
- f",max_value={self.max_value!r})"
- )
-
- def __getitem__(self, index):
+ def __getitem__(self, index: int) -> BitCountingGroup:
"""Return the group at the index.
:param index: Index of the group to return.
@@ -99,7 +74,7 @@ class BitCountingGroupList:
"""
return self.group_list[index]
- def __len__(self):
+ def __len__(self) -> int:
"""Return the length of the group list.
:returns: The Length of group_list.
@@ -107,19 +82,44 @@ class BitCountingGroupList:
"""
return len(self.group_list)
- def copy(self):
+ def copy(self) -> "BitCountingGroupList":
"""Return a new instance with copied internal state.
:returns: The copied instance.
:rtype: BitCountingGroupList
"""
return self.__class__(
- group_list=self.group_list, bits_except_last=self.bits_except_last,
- max_value=self.max_value
+ max_value=self.max_value,
+ group_list=[group.copy() for group in self.group_list],
+ bits_except_last=self.bits_except_last,
+ )
+
+ def copy_fast(self) -> "BitCountingGroupList":
+ """Return a new instance with minimaly copied internal state.
+
+ The assumption here is that only the last group will ever be mutated
+ (in self, probably never in the return value),
+ so all the previous groups can be "copied by reference".
+
+ :returns: The copied instance.
+ :rtype: BitCountingGroupList
+ """
+ group_list = list(self.group_list)
+ if group_list:
+ group_list[-1] = group_list[-1].copy()
+ # Further speedup is possible by keeping the last group
+ # as a singly linked (from end) list,
+ # but for CSIT sample sizes, copy of whole Python list is faster.
+ # TODO: Implement linked list as an option
+ # for users with many samples.
+ return self.__class__(
+ max_value=self.max_value,
+ group_list=group_list,
+ bits_except_last=self.bits_except_last,
)
@property
- def bits(self):
+ def bits(self) -> float:
"""Return overall bit content of the group list.
:returns: The overall information content in bits.
@@ -130,12 +130,17 @@ class BitCountingGroupList:
# TODO: Is it worth to cache the overall result?
return self.bits_except_last + self.group_list[-1].bits
- def append_group_of_runs(self, runs):
+ def append_group_of_runs(
+ self,
+ runs: typing.Union[
+ BitCountingGroup, typing.List[typing.Union[float, AvgStdevStats]]
+ ],
+ ) -> "BitCountingGroupList":
"""Mutate to add a new group based on the runs, return self.
- The argument is copied before adding to the group list,
- so further edits do not affect the grup list.
- The argument can also be a group, only runs from it are used.
+ The list argument is NOT copied before adding to the group list,
+ so further edits MAY not affect the grup list.
+ The list from BitCountingGroup is shallow copied though.
:param runs: Runs to form the next group to be appended to self.
:type runs: Union[Iterable[Run], BitCountingGroup]
@@ -151,12 +156,15 @@ class BitCountingGroupList:
new_group.cached_bits = None
else:
new_group = BitCountingGroup(
- run_list=runs, max_value=self.max_value, prev_avg=prev_avg)
+ run_list=runs, max_value=self.max_value, prev_avg=prev_avg
+ )
self.bits_except_last = self.bits
self.group_list.append(new_group)
return self
- def append_run_to_to_last_group(self, run):
+ def append_run_to_to_last_group(
+ self, run: typing.Union[float, AvgStdevStats]
+ ) -> "BitCountingGroupList":
"""Mutate to add new run at the end of the last group.
Basically a one-liner, only returning group list instead of last group.
@@ -170,7 +178,9 @@ class BitCountingGroupList:
self.group_list[-1].append(run)
return self
- def extend_runs_to_last_group(self, runs):
+ def extend_runs_to_last_group(
+ self, runs: typing.Iterable[typing.Union[float, AvgStdevStats]]
+ ) -> "BitCountingGroupList":
"""Mutate to add new runs to the end of the last group.
A faster alternative to appending runs one by one in a loop.
diff --git a/resources/libraries/python/jumpavg/BitCountingStats.py b/resources/libraries/python/jumpavg/BitCountingStats.py
index 7b5e659214..524ac952c8 100644
--- a/resources/libraries/python/jumpavg/BitCountingStats.py
+++ b/resources/libraries/python/jumpavg/BitCountingStats.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2021 Cisco and/or its affiliates.
+# Copyright (c) 2022 Cisco and/or its affiliates.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at:
@@ -13,11 +13,14 @@
"""Module holding BitCountingStats class."""
+import dataclasses
import math
+import typing
from .AvgStdevStats import AvgStdevStats
+@dataclasses.dataclass
class BitCountingStats(AvgStdevStats):
"""Class for statistics which include information content of a group.
@@ -33,11 +36,20 @@ class BitCountingStats(AvgStdevStats):
Only for_runs method calls the parent implementation, without using super().
"""
- def __init__(
- self, size=0, avg=None, stdev=0.0, max_value=None, prev_avg=None):
- """Construct the stats object by computing from the values needed.
+ max_value: float = None
+ """Maximal sample value (real or estimated).
+ Default value is there just for argument ordering reasons,
+ leaving None leads to exceptions."""
+ prev_avg: typing.Optional[float] = None
+ """Population average of the previous group (if any)."""
+ bits: float = None
+ """The computed information content of the group.
+ It is formally an argument to init function, just to keep repr string
+ a valid call. ut the init value is ignored and always recomputed.
+ """
- The values are not sanitized, faulty callers can cause math errors.
+ def __post_init__(self):
+ """Construct the stats object by computing from the values needed.
The None values are allowed for stats for zero size data,
but such stats can report arbitrary avg and max_value.
@@ -54,91 +66,54 @@ class BitCountingStats(AvgStdevStats):
(but not with floating point mechanic).
The hope is the difference will have
no real impact on the classification procedure.
-
- :param size: Number of values participating in this group.
- :param avg: Population average of the participating sample values.
- :param stdev: Population standard deviation of the sample values.
- :param max_value: Maximal expected value.
- TODO: This might be more optimal,
- but max-invariant algorithm will be nicer.
- :param prev_avg: Population average of the previous group.
- If None, no previous average is taken into account.
- If not None, the given previous average is used to discourage
- consecutive groups with similar averages
- (opposite triangle distribution is assumed).
- :type avg: float
- :type size: int
- :type stdev: float
- :type max_value: Union[float, NoneType]
- :type prev_avg: Union[float, NoneType]
"""
- self.avg = avg
- self.size = size
- self.stdev = stdev
- self.max_value = max_value
- self.prev_avg = prev_avg
# Zero size should in principle have non-zero bits (coding zero size),
# but zero allows users to add empty groups without affecting bits.
self.bits = 0.0
if self.size < 1:
return
- if avg is None:
- raise ValueError(f"Avg is None: {self!r}")
- if max_value is None or max_value <= 0.0:
+ if self.max_value <= 0.0:
raise ValueError(f"Invalid max value: {self!r}")
# Length of the sequence must be also counted in bits,
# otherwise the message would not be decodable.
# Model: probability of k samples is 1/k - 1/(k+1) == 1/k/(k+1)
# This is compatible with zero size leading to zero bits.
- self.bits += math.log(size * (size + 1), 2)
- if prev_avg is None:
+ self.bits += math.log(self.size * (self.size + 1), 2)
+ if self.prev_avg is None:
# Avg is considered to be uniformly distributed
# from zero to max_value.
- self.bits += math.log(max_value + 1.0, 2)
+ self.bits += math.log(self.max_value + 1.0, 2)
else:
# Opposite triangle distribution with minimum.
self.bits += math.log(
- max_value * (max_value + 1) / (abs(avg - prev_avg) + 1), 2)
+ (self.max_value * (self.max_value + 1))
+ / (abs(self.avg - self.prev_avg) + 1),
+ 2,
+ )
if self.size < 2:
return
# Stdev is considered to be uniformly distributed
# from zero to max_value. That is quite a bad expectation,
# but resilient to negative samples etc.
- self.bits += math.log(max_value + 1.0, 2)
+ self.bits += math.log(self.max_value + 1.0, 2)
# Now we know the samples lie on sphere in size-1 dimensions.
# So it is (size-2)-sphere, with radius^2 == stdev^2 * size.
# https://en.wikipedia.org/wiki/N-sphere
- sphere_area_ln = math.log(2) + math.log(math.pi) * ((size - 1) / 2.0)
- sphere_area_ln -= math.lgamma((size - 1) / 2.0)
- sphere_area_ln += math.log(stdev + 1.0) * (size - 2)
- sphere_area_ln += math.log(size) * ((size - 2) / 2.0)
+ sphere_area_ln = math.log(2)
+ sphere_area_ln += math.log(math.pi) * ((self.size - 1) / 2.0)
+ sphere_area_ln -= math.lgamma((self.size - 1) / 2.0)
+ sphere_area_ln += math.log(self.stdev + 1.0) * (self.size - 2)
+ sphere_area_ln += math.log(self.size) * ((self.size - 2) / 2.0)
self.bits += sphere_area_ln / math.log(2)
- def __str__(self):
- """Return string with human readable description of the group.
-
- :returns: Readable description.
- :rtype: str
- """
- return (
- f"size={self.size} avg={self.avg} stdev={self.stdev}"
- f" bits={self.bits}"
- )
-
- def __repr__(self):
- """Return string executable as Python constructor call.
-
- :returns: Executable constructor call.
- :rtype: str
- """
- return (
- f"BitCountingStats(size={self.size!r},avg={self.avg!r}"
- f",stdev={self.stdev!r},max_value={self.max_value!r}"
- f",prev_avg={self.prev_avg!r})"
- )
-
+ # TODO: Rename, so pylint stops complaining about signature change.
@classmethod
- def for_runs(cls, runs, max_value=None, prev_avg=None):
+ def for_runs(
+ cls,
+ runs: typing.Iterable[typing.Union[float, AvgStdevStats]],
+ max_value: float,
+ prev_avg: typing.Optional[float] = None,
+ ):
"""Return new stats instance describing the sequence of runs.
If you want to append data to existing stats object,
@@ -164,6 +139,11 @@ class BitCountingStats(AvgStdevStats):
:rtype: cls
"""
asd = AvgStdevStats.for_runs(runs)
- ret_obj = cls(size=asd.size, avg=asd.avg, stdev=asd.stdev,
- max_value=max_value, prev_avg=prev_avg)
+ ret_obj = cls(
+ size=asd.size,
+ avg=asd.avg,
+ stdev=asd.stdev,
+ max_value=max_value,
+ prev_avg=prev_avg,
+ )
return ret_obj
diff --git a/resources/libraries/python/jumpavg/classify.py b/resources/libraries/python/jumpavg/classify.py
index 252c71e8d5..87d2502037 100644
--- a/resources/libraries/python/jumpavg/classify.py
+++ b/resources/libraries/python/jumpavg/classify.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2021 Cisco and/or its affiliates.
+# Copyright (c) 2022 Cisco and/or its affiliates.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at:
@@ -20,15 +20,20 @@ for grouping results into the list of groups,
assuming each group is a population of different Gaussian distribution.
"""
+import typing
+
from .AvgStdevStats import AvgStdevStats
from .BitCountingGroupList import BitCountingGroupList
-def classify(values):
+def classify(
+ values: typing.Iterable[typing.Union[float, typing.Iterable[float]]]
+) -> BitCountingGroupList:
"""Return the values in groups of optimal bit count.
Here, a value is either a float, or an iterable of floats.
Such iterables represent an undivisible sequence of floats.
+ Int is accepted anywhere instead of float.
Internally, such sequence is replaced by AvgStdevStats
after maximal value is found.
@@ -50,27 +55,24 @@ def classify(values):
if subvalue > max_value:
max_value = subvalue
processed_values.append(AvgStdevStats.for_runs(value))
- open_at = list()
- closed_before = [BitCountingGroupList(max_value=max_value)]
- for index, value in enumerate(processed_values):
- newly_open = closed_before[index].copy()
- newly_open.append_group_of_runs([value])
- open_at.append(newly_open)
- record_group_list = newly_open
- for previous_index, old_open in enumerate(open_at[:index]):
- new_open = old_open.copy().append_run_to_to_last_group(value)
- open_at[previous_index] = new_open
- if new_open.bits < record_group_list.bits:
- record_group_list = new_open
- closed_before.append(record_group_list)
- partition = closed_before[-1]
- previous_average = partition[0].stats.avg
- for group in partition:
+ # Glist means group list (BitCountingGroupList).
+ open_glists = list()
+ record_glist = BitCountingGroupList(max_value=max_value)
+ for value in processed_values:
+ new_open_glist = record_glist.copy_fast().append_group_of_runs([value])
+ record_glist = new_open_glist
+ for old_open_glist in open_glists:
+ old_open_glist.append_run_to_to_last_group(value)
+ if old_open_glist.bits < record_glist.bits:
+ record_glist = old_open_glist
+ open_glists.append(new_open_glist)
+ previous_average = record_glist[0].stats.avg
+ for group in record_glist:
if group.stats.avg == previous_average:
- group.comment = u"normal"
+ group.comment = "normal"
elif group.stats.avg < previous_average:
- group.comment = u"regression"
+ group.comment = "regression"
elif group.stats.avg > previous_average:
- group.comment = u"progression"
+ group.comment = "progression"
previous_average = group.stats.avg
- return partition
+ return record_glist