aboutsummaryrefslogtreecommitdiffstats
path: root/resources/libraries/python/jumpavg
diff options
context:
space:
mode:
Diffstat (limited to 'resources/libraries/python/jumpavg')
-rw-r--r--resources/libraries/python/jumpavg/__init__.py10
-rw-r--r--resources/libraries/python/jumpavg/avg_stdev_stats.py (renamed from resources/libraries/python/jumpavg/AvgStdevStats.py)2
-rw-r--r--resources/libraries/python/jumpavg/bit_counting_group.py (renamed from resources/libraries/python/jumpavg/BitCountingGroup.py)20
-rw-r--r--resources/libraries/python/jumpavg/bit_counting_group_list.py (renamed from resources/libraries/python/jumpavg/BitCountingGroupList.py)18
-rw-r--r--resources/libraries/python/jumpavg/bit_counting_stats.py (renamed from resources/libraries/python/jumpavg/BitCountingStats.py)38
-rw-r--r--resources/libraries/python/jumpavg/classify.py44
6 files changed, 87 insertions, 45 deletions
diff --git a/resources/libraries/python/jumpavg/__init__.py b/resources/libraries/python/jumpavg/__init__.py
index 4fa696c538..7f63b5ee39 100644
--- a/resources/libraries/python/jumpavg/__init__.py
+++ b/resources/libraries/python/jumpavg/__init__.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2021 Cisco and/or its affiliates.
+# Copyright (c) 2023 Cisco and/or its affiliates.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at:
@@ -15,8 +15,8 @@
__init__ file for "jumpavg" Python package.
"""
-from .AvgStdevStats import AvgStdevStats
-from .BitCountingStats import BitCountingStats
-from .BitCountingGroup import BitCountingGroup
-from .BitCountingGroupList import BitCountingGroupList
+from .avg_stdev_stats import AvgStdevStats
+from .bit_counting_stats import BitCountingStats
+from .bit_counting_group import BitCountingGroup
+from .bit_counting_group_list import BitCountingGroupList
from .classify import classify
diff --git a/resources/libraries/python/jumpavg/AvgStdevStats.py b/resources/libraries/python/jumpavg/avg_stdev_stats.py
index d40b316bf1..3d6a834919 100644
--- a/resources/libraries/python/jumpavg/AvgStdevStats.py
+++ b/resources/libraries/python/jumpavg/avg_stdev_stats.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022 Cisco and/or its affiliates.
+# Copyright (c) 2023 Cisco and/or its affiliates.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at:
diff --git a/resources/libraries/python/jumpavg/BitCountingGroup.py b/resources/libraries/python/jumpavg/bit_counting_group.py
index 48bea086f4..22c9337532 100644
--- a/resources/libraries/python/jumpavg/BitCountingGroup.py
+++ b/resources/libraries/python/jumpavg/bit_counting_group.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022 Cisco and/or its affiliates.
+# Copyright (c) 2023 Cisco and/or its affiliates.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at:
@@ -17,8 +17,8 @@ import collections
import dataclasses
import typing
-from .AvgStdevStats import AvgStdevStats
-from .BitCountingStats import BitCountingStats
+from .avg_stdev_stats import AvgStdevStats
+from .bit_counting_stats import BitCountingStats
@dataclasses.dataclass
@@ -46,7 +46,9 @@ class BitCountingGroup(collections.abc.Sequence):
so the caller should clone it to avoid unexpected muations."""
max_value: float
"""Maximal sample value to expect."""
- comment: str = "unknown"
+ unit: float = 1.0
+ """Typical resolution of the values"""
+ comment: str = "normal"
"""Any string giving more info, e.g. "regression"."""
prev_avg: typing.Optional[float] = None
"""Average of the previous group, if any."""
@@ -64,7 +66,7 @@ class BitCountingGroup(collections.abc.Sequence):
e.g. whether the stats and bits values reflect the runs.
"""
if self.stats is None:
- self.stats = AvgStdevStats.for_runs(self.run_list)
+ self.stats = AvgStdevStats.for_runs(runs=self.run_list)
@property
def bits(self) -> float:
@@ -76,8 +78,11 @@ class BitCountingGroup(collections.abc.Sequence):
:rtype: float
"""
if self.cached_bits is None:
- self.cached_bits = BitCountingStats.for_runs(
- [self.stats], self.max_value, self.prev_avg
+ self.cached_bits = BitCountingStats.for_runs_and_params(
+ runs=[self.stats],
+ max_value=self.max_value,
+ unit=self.unit,
+ prev_avg=self.prev_avg,
).bits
return self.cached_bits
@@ -115,6 +120,7 @@ class BitCountingGroup(collections.abc.Sequence):
stats=stats,
cached_bits=self.cached_bits,
max_value=self.max_value,
+ unit=self.unit,
prev_avg=self.prev_avg,
comment=self.comment,
)
diff --git a/resources/libraries/python/jumpavg/BitCountingGroupList.py b/resources/libraries/python/jumpavg/bit_counting_group_list.py
index 468e79b236..e4d33b53a2 100644
--- a/resources/libraries/python/jumpavg/BitCountingGroupList.py
+++ b/resources/libraries/python/jumpavg/bit_counting_group_list.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022 Cisco and/or its affiliates.
+# Copyright (c) 2023 Cisco and/or its affiliates.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at:
@@ -17,8 +17,8 @@ import collections
import dataclasses
import typing
-from .AvgStdevStats import AvgStdevStats # Just for type hints.
-from .BitCountingGroup import BitCountingGroup
+from .avg_stdev_stats import AvgStdevStats # Just for type hints.
+from .bit_counting_group import BitCountingGroup
@dataclasses.dataclass
@@ -46,6 +46,8 @@ class BitCountingGroupList(collections.abc.Sequence):
max_value: float
"""Maximal sample value to base bits computation on."""
+ unit: float = 1.0
+ """Typical resolution of the values."""
group_list: typing.List[BitCountingGroup] = None
"""List of groups to compose this group list.
Init also accepts None standing for an empty list.
@@ -62,7 +64,7 @@ class BitCountingGroupList(collections.abc.Sequence):
e.g. whether the cached bits values (and bits_except_last) make sense.
"""
if self.group_list is None:
- self.group_list = list()
+ self.group_list = []
def __getitem__(self, index: int) -> BitCountingGroup:
"""Return the group at the index.
@@ -90,6 +92,7 @@ class BitCountingGroupList(collections.abc.Sequence):
"""
return self.__class__(
max_value=self.max_value,
+ unit=self.unit,
group_list=[group.copy() for group in self.group_list],
bits_except_last=self.bits_except_last,
)
@@ -114,6 +117,7 @@ class BitCountingGroupList(collections.abc.Sequence):
# for users with many samples.
return self.__class__(
max_value=self.max_value,
+ unit=self.unit,
group_list=group_list,
bits_except_last=self.bits_except_last,
)
@@ -152,11 +156,15 @@ class BitCountingGroupList(collections.abc.Sequence):
# It is faster to avoid stats recalculation.
new_group = runs.copy()
new_group.max_value = self.max_value
+ # Unit is common.
new_group.prev_avg = prev_avg
new_group.cached_bits = None
else:
new_group = BitCountingGroup(
- run_list=runs, max_value=self.max_value, prev_avg=prev_avg
+ run_list=runs,
+ max_value=self.max_value,
+ unit=self.unit,
+ prev_avg=prev_avg,
)
self.bits_except_last = self.bits
self.group_list.append(new_group)
diff --git a/resources/libraries/python/jumpavg/BitCountingStats.py b/resources/libraries/python/jumpavg/bit_counting_stats.py
index 524ac952c8..caece2c8ca 100644
--- a/resources/libraries/python/jumpavg/BitCountingStats.py
+++ b/resources/libraries/python/jumpavg/bit_counting_stats.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022 Cisco and/or its affiliates.
+# Copyright (c) 2023 Cisco and/or its affiliates.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at:
@@ -17,7 +17,7 @@ import dataclasses
import math
import typing
-from .AvgStdevStats import AvgStdevStats
+from .avg_stdev_stats import AvgStdevStats
@dataclasses.dataclass
@@ -40,6 +40,8 @@ class BitCountingStats(AvgStdevStats):
"""Maximal sample value (real or estimated).
Default value is there just for argument ordering reasons,
leaving None leads to exceptions."""
+ unit: float = 1.0
+ """Typical resolution of the values."""
prev_avg: typing.Optional[float] = None
"""Population average of the previous group (if any)."""
bits: float = None
@@ -74,6 +76,8 @@ class BitCountingStats(AvgStdevStats):
return
if self.max_value <= 0.0:
raise ValueError(f"Invalid max value: {self!r}")
+ max_value = self.max_value / self.unit
+ avg = self.avg / self.unit
# Length of the sequence must be also counted in bits,
# otherwise the message would not be decodable.
# Model: probability of k samples is 1/k - 1/(k+1) == 1/k/(k+1)
@@ -82,36 +86,37 @@ class BitCountingStats(AvgStdevStats):
if self.prev_avg is None:
# Avg is considered to be uniformly distributed
# from zero to max_value.
- self.bits += math.log(self.max_value + 1.0, 2)
+ self.bits += math.log(max_value + 1, 2)
else:
# Opposite triangle distribution with minimum.
- self.bits += math.log(
- (self.max_value * (self.max_value + 1))
- / (abs(self.avg - self.prev_avg) + 1),
- 2,
- )
+ prev_avg = self.prev_avg / self.unit
+ norm = prev_avg * prev_avg
+ norm -= (prev_avg - 1) * max_value
+ norm += max_value * max_value / 2
+ self.bits -= math.log((abs(avg - prev_avg) + 1) / norm, 2)
if self.size < 2:
return
+ stdev = self.stdev / self.unit
# Stdev is considered to be uniformly distributed
# from zero to max_value. That is quite a bad expectation,
# but resilient to negative samples etc.
- self.bits += math.log(self.max_value + 1.0, 2)
+ self.bits += math.log(max_value + 1, 2)
# Now we know the samples lie on sphere in size-1 dimensions.
# So it is (size-2)-sphere, with radius^2 == stdev^2 * size.
# https://en.wikipedia.org/wiki/N-sphere
sphere_area_ln = math.log(2)
- sphere_area_ln += math.log(math.pi) * ((self.size - 1) / 2.0)
- sphere_area_ln -= math.lgamma((self.size - 1) / 2.0)
- sphere_area_ln += math.log(self.stdev + 1.0) * (self.size - 2)
- sphere_area_ln += math.log(self.size) * ((self.size - 2) / 2.0)
+ sphere_area_ln += math.log(math.pi) * ((self.size - 1) / 2)
+ sphere_area_ln -= math.lgamma((self.size - 1) / 2)
+ sphere_area_ln += math.log(stdev + 1) * (self.size - 2)
+ sphere_area_ln += math.log(self.size) * ((self.size - 2) / 2)
self.bits += sphere_area_ln / math.log(2)
- # TODO: Rename, so pylint stops complaining about signature change.
@classmethod
- def for_runs(
+ def for_runs_and_params(
cls,
runs: typing.Iterable[typing.Union[float, AvgStdevStats]],
max_value: float,
+ unit: float = 1.0,
prev_avg: typing.Optional[float] = None,
):
"""Return new stats instance describing the sequence of runs.
@@ -131,9 +136,11 @@ class BitCountingStats(AvgStdevStats):
:param runs: Sequence of data to describe by the new metadata.
:param max_value: Maximal expected value.
+ :param unit: Typical resolution of the values.
:param prev_avg: Population average of the previous group, if any.
:type runs: Iterable[Union[float, AvgStdevStats]]
:type max_value: Union[float, NoneType]
+ :type unit: float
:type prev_avg: Union[float, NoneType]
:returns: The new stats instance.
:rtype: cls
@@ -144,6 +151,7 @@ class BitCountingStats(AvgStdevStats):
avg=asd.avg,
stdev=asd.stdev,
max_value=max_value,
+ unit=unit,
prev_avg=prev_avg,
)
return ret_obj
diff --git a/resources/libraries/python/jumpavg/classify.py b/resources/libraries/python/jumpavg/classify.py
index 87d2502037..cc3cdcceed 100644
--- a/resources/libraries/python/jumpavg/classify.py
+++ b/resources/libraries/python/jumpavg/classify.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022 Cisco and/or its affiliates.
+# Copyright (c) 2023 Cisco and/or its affiliates.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at:
@@ -13,21 +13,23 @@
"""Module holding the classify function
-Classification os one of primary purposes of this package.
+Classification is one of primary purposes of this package.
Minimal message length principle is used
for grouping results into the list of groups,
assuming each group is a population of different Gaussian distribution.
"""
-import typing
+from typing import Iterable, Optional, Union
-from .AvgStdevStats import AvgStdevStats
-from .BitCountingGroupList import BitCountingGroupList
+from .avg_stdev_stats import AvgStdevStats
+from .bit_counting_group_list import BitCountingGroupList
def classify(
- values: typing.Iterable[typing.Union[float, typing.Iterable[float]]]
+ values: Iterable[Union[float, Iterable[float]]],
+ unit: Optional[float] = None,
+ sbps: Optional[float] = None,
) -> BitCountingGroupList:
"""Return the values in groups of optimal bit count.
@@ -38,12 +40,27 @@ def classify(
Internally, such sequence is replaced by AvgStdevStats
after maximal value is found.
+ If the values are smaller than expected (below one unit),
+ the underlying assumption break down and the classification is wrong.
+ Use the "unit" parameter to hint at what the input resolution is.
+
+ If the correct value of unit is not known beforehand,
+ the argument "sbps" (Significant Bits Per Sample) can be used
+ to set unit such that maximal sample value is this many ones in binary.
+ If neither "unit" nor "sbps" are given, "sbps" of 12 is used by default.
+
:param values: Sequence of runs to classify.
+ :param unit: Typical resolution of the values.
+ Zero and None means no unit given.
+ :param sbps: Significant Bits Per Sample. None on zero means 12.
+ If units is not set, this is used to compute unit from max sample value.
:type values: Iterable[Union[float, Iterable[float]]]
+ :type unit: Optional[float]
+ :type sbps: Optional[float]
:returns: Classified group list.
:rtype: BitCountingGroupList
"""
- processed_values = list()
+ processed_values = []
max_value = 0.0
for value in values:
if isinstance(value, (float, int)):
@@ -55,9 +72,14 @@ def classify(
if subvalue > max_value:
max_value = subvalue
processed_values.append(AvgStdevStats.for_runs(value))
+ if not unit:
+ if not sbps:
+ sbps = 12.0
+ max_in_units = pow(2.0, sbps + 1.0) - 1.0
+ unit = max_value / max_in_units
# Glist means group list (BitCountingGroupList).
- open_glists = list()
- record_glist = BitCountingGroupList(max_value=max_value)
+ open_glists = []
+ record_glist = BitCountingGroupList(max_value=max_value, unit=unit)
for value in processed_values:
new_open_glist = record_glist.copy_fast().append_group_of_runs([value])
record_glist = new_open_glist
@@ -68,9 +90,7 @@ def classify(
open_glists.append(new_open_glist)
previous_average = record_glist[0].stats.avg
for group in record_glist:
- if group.stats.avg == previous_average:
- group.comment = "normal"
- elif group.stats.avg < previous_average:
+ if group.stats.avg < previous_average:
group.comment = "regression"
elif group.stats.avg > previous_average:
group.comment = "progression"