aboutsummaryrefslogtreecommitdiffstats
path: root/resources/libraries/python/jumpavg
diff options
context:
space:
mode:
Diffstat (limited to 'resources/libraries/python/jumpavg')
-rw-r--r--resources/libraries/python/jumpavg/__init__.py10
-rw-r--r--resources/libraries/python/jumpavg/avg_stdev_stats.py (renamed from resources/libraries/python/jumpavg/AvgStdevStats.py)58
-rw-r--r--resources/libraries/python/jumpavg/bit_counting_group.py (renamed from resources/libraries/python/jumpavg/BitCountingGroup.py)146
-rw-r--r--resources/libraries/python/jumpavg/bit_counting_group_list.py (renamed from resources/libraries/python/jumpavg/BitCountingGroupList.py)140
-rw-r--r--resources/libraries/python/jumpavg/bit_counting_stats.py (renamed from resources/libraries/python/jumpavg/BitCountingStats.py)131
-rw-r--r--resources/libraries/python/jumpavg/classify.py78
6 files changed, 281 insertions, 282 deletions
diff --git a/resources/libraries/python/jumpavg/__init__.py b/resources/libraries/python/jumpavg/__init__.py
index 4fa696c538..7f63b5ee39 100644
--- a/resources/libraries/python/jumpavg/__init__.py
+++ b/resources/libraries/python/jumpavg/__init__.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2021 Cisco and/or its affiliates.
+# Copyright (c) 2023 Cisco and/or its affiliates.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at:
@@ -15,8 +15,8 @@
__init__ file for "jumpavg" Python package.
"""
-from .AvgStdevStats import AvgStdevStats
-from .BitCountingStats import BitCountingStats
-from .BitCountingGroup import BitCountingGroup
-from .BitCountingGroupList import BitCountingGroupList
+from .avg_stdev_stats import AvgStdevStats
+from .bit_counting_stats import BitCountingStats
+from .bit_counting_group import BitCountingGroup
+from .bit_counting_group_list import BitCountingGroupList
from .classify import classify
diff --git a/resources/libraries/python/jumpavg/AvgStdevStats.py b/resources/libraries/python/jumpavg/avg_stdev_stats.py
index 4720c10f3d..c21c50c8f8 100644
--- a/resources/libraries/python/jumpavg/AvgStdevStats.py
+++ b/resources/libraries/python/jumpavg/avg_stdev_stats.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2021 Cisco and/or its affiliates.
+# Copyright (c) 2024 Cisco and/or its affiliates.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at:
@@ -13,9 +13,12 @@
"""Module holding AvgStdevStats class."""
+import dataclasses
import math
+import typing
+@dataclasses.dataclass
class AvgStdevStats:
"""Class for statistics which include average and stdev of a group.
@@ -25,45 +28,18 @@ class AvgStdevStats:
Instances are only statistics, the data itself is stored elsewhere.
"""
- def __init__(self, size=0, avg=0.0, stdev=0.0):
- """Construct the stats object by storing the values needed.
-
- Each value has to be numeric.
- The values are not sanitized depending on size, wrong initialization
- can cause delayed math errors.
-
- :param size: Number of values participating in this group.
- :param avg: Population average of the participating sample values.
- :param stdev: Population standard deviation of the sample values.
- :type size: int
- :type avg: float
- :type stdev: float
- """
- self.size = size
- self.avg = avg
- self.stdev = stdev
-
- def __str__(self):
- """Return string with human readable description of the group.
-
- :returns: Readable description.
- :rtype: str
- """
- return f"size={self.size} avg={self.avg} stdev={self.stdev}"
-
- def __repr__(self):
- """Return string executable as Python constructor call.
-
- :returns: Executable constructor call.
- :rtype: str
- """
- return (
- f"AvgStdevStats(size={self.size!r},avg={self.avg!r}"
- f",stdev={self.stdev!r})"
- )
+ size: int = 0
+ """Number of scalar values (samples) participating in this group."""
+ avg: float = 0.0
+ """Population average of the participating sample values."""
+ stdev: float = 0.0
+ """Population standard deviation of the sample values."""
@classmethod
- def for_runs(cls, runs):
+ def for_runs(
+ cls,
+ runs: typing.Iterable[typing.Union[float, "AvgStdevStats"]],
+ ) -> "AvgStdevStats":
"""Return new stats instance describing the sequence of runs.
If you want to append data to existing stats object,
@@ -72,8 +48,8 @@ class AvgStdevStats:
Instead of a verb, "for" is used to start this method name,
to signify the result contains less information than the input data.
- Here, Run is a hypothetical abstract class, an union of float and cls.
- Defining that as a real abstract class in Python 2 is too much hassle.
+ Here, run is a hypothetical abstract class, an union of float and cls.
+ Defining that as a real abstract class in Python is too much hassle.
:param runs: Sequence of data to describe by the new metadata.
:type runs: Iterable[Union[float, cls]]
@@ -97,6 +73,8 @@ class AvgStdevStats:
run_size = run.size
run_avg = run.avg
run_stdev = run.stdev
+ if run_size < 1:
+ continue
old_total_size = total_size
delta = run_avg - total_avg
total_size += run_size
diff --git a/resources/libraries/python/jumpavg/BitCountingGroup.py b/resources/libraries/python/jumpavg/bit_counting_group.py
index f1bdc502fd..22c9337532 100644
--- a/resources/libraries/python/jumpavg/BitCountingGroup.py
+++ b/resources/libraries/python/jumpavg/bit_counting_group.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2021 Cisco and/or its affiliates.
+# Copyright (c) 2023 Cisco and/or its affiliates.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at:
@@ -13,14 +13,16 @@
"""Module holding BitCountingGroup class."""
-import copy
+import collections
+import dataclasses
+import typing
-from .AvgStdevStats import AvgStdevStats
-from .BitCountingStats import BitCountingStats
+from .avg_stdev_stats import AvgStdevStats
+from .bit_counting_stats import BitCountingStats
-class BitCountingGroup:
- # TODO: Inherit from collections.abc.Sequence in Python 3.
+@dataclasses.dataclass
+class BitCountingGroup(collections.abc.Sequence):
"""Group of runs which tracks bit count in an efficient manner.
This class contains methods that mutate the internal state,
@@ -38,74 +40,63 @@ class BitCountingGroup:
a method to add a single run in an efficient manner is provided.
"""
- def __init__(self, run_list=None, stats=None, bits=None,
- max_value=None, prev_avg=None, comment="unknown"):
- """Set the internal state and partially the stats.
-
- A "group" stands for an Iterable of runs, where "run" is either
- a float value, or a stats-like object (only size, avg and stdev
- are accessed). Run is a hypothetical abstract class,
- defining it in Python 2 is too much hassle.
-
- Only a copy of the run list argument value is stored in the instance,
- so it is not a problem if the value object is mutated afterwards.
+ run_list: typing.List[typing.Union[float, AvgStdevStats]]
+ """List of run to compose into this group.
+ The init call takes ownership of the list,
+ so the caller should clone it to avoid unexpected muations."""
+ max_value: float
+ """Maximal sample value to expect."""
+ unit: float = 1.0
+ """Typical resolution of the values"""
+ comment: str = "normal"
+ """Any string giving more info, e.g. "regression"."""
+ prev_avg: typing.Optional[float] = None
+ """Average of the previous group, if any."""
+ stats: AvgStdevStats = None
+ """Stats object used for computing bits.
+ Almost always recomputed, except when non-None in init."""
+ cached_bits: typing.Optional[float] = None
+ """Cached value of information content.
+ Noned on edit, recomputed if needed and None."""
+
+ def __post_init__(self):
+ """Recompute stats is None.
It is not verified whether the user provided values are valid,
e.g. whether the stats and bits values reflect the runs.
-
- :param run_list: List of run to compose into this group. Default: empty.
- :param stats: Stats object used for computing bits.
- :param bits: Cached value of information content.
- :param max_value: Maximal sample value to be used for computing.
- :param prev_avg: Average of the previous group, affects bits.
- :param comment: Any string giving more info, e.g. "regression".
- :type run_list: Iterable[Run]
- :type stats: Optional[AvgStdevStats]
- :type bits: Optional[float]
- :type max_value: float
- :type prev_avg: Optional[float]
- :type comment: str
"""
- self.run_list = copy.deepcopy(run_list) if run_list else list()
- self.stats = stats
- self.cached_bits = bits
- self.max_value = max_value
- self.prev_avg = prev_avg
- self.comment = comment
if self.stats is None:
- self.stats = AvgStdevStats.for_runs(self.run_list)
-
- def __str__(self):
- """Return string with human readable description of the group.
+ self.stats = AvgStdevStats.for_runs(runs=self.run_list)
- :returns: Readable description.
- :rtype: str
- """
- return f"stats={self.stats} bits={self.cached_bits}"
+ @property
+ def bits(self) -> float:
+ """Return overall bit content of the group list.
- def __repr__(self):
- """Return string executable as Python constructor call.
+ If not cached, compute from stats and cache.
- :returns: Executable constructor call.
- :rtype: str
+ :returns: The overall information content in bits.
+ :rtype: float
"""
- return (
- f"BitCountingGroup(run_list={self.run_list!r},stats={self.stats!r}"
- f",bits={self.cached_bits!r},max_value={self.max_value!r}"
- f",prev_avg={self.prev_avg!r},comment={self.comment!r})"
- )
+ if self.cached_bits is None:
+ self.cached_bits = BitCountingStats.for_runs_and_params(
+ runs=[self.stats],
+ max_value=self.max_value,
+ unit=self.unit,
+ prev_avg=self.prev_avg,
+ ).bits
+ return self.cached_bits
- def __getitem__(self, index):
+ def __getitem__(self, index: int) -> typing.Union[float, AvgStdevStats]:
"""Return the run at the index.
:param index: Index of the run to return.
:type index: int
:returns: The run at the index.
- :rtype: Run
+ :rtype: typing.Union[float, AvgStdevStats]
"""
return self.run_list[index]
- def __len__(self):
+ def __len__(self) -> int:
"""Return the number of runs in the group.
:returns: The Length of run_list.
@@ -113,39 +104,36 @@ class BitCountingGroup:
"""
return len(self.run_list)
- def copy(self):
+ def copy(self) -> "BitCountingGroup":
"""Return a new instance with copied internal state.
+ Stats are preserved to avoid re-computation.
+ As both float and AvgStdevStats are effectively immutable,
+ only a shallow copy of the runs list is performed.
+
:returns: The copied instance.
:rtype: BitCountingGroup
"""
stats = AvgStdevStats.for_runs([self.stats])
return self.__class__(
- run_list=self.run_list, stats=stats, bits=self.cached_bits,
- max_value=self.max_value, prev_avg=self.prev_avg,
- comment=self.comment)
-
- @property
- def bits(self):
- """Return overall bit content of the group list.
-
- If not cached, compute from stats and cache.
-
- :returns: The overall information content in bits.
- :rtype: float
- """
- if self.cached_bits is None:
- self.cached_bits = BitCountingStats.for_runs(
- [self.stats], self.max_value, self.prev_avg).bits
- return self.cached_bits
+ run_list=list(self.run_list),
+ stats=stats,
+ cached_bits=self.cached_bits,
+ max_value=self.max_value,
+ unit=self.unit,
+ prev_avg=self.prev_avg,
+ comment=self.comment,
+ )
- def append(self, run):
+ def append(
+ self, run: typing.Union[float, AvgStdevStats]
+ ) -> "BitCountingGroup":
"""Mutate to add the new run, return self.
Stats are updated, but old bits value is deleted from cache.
:param run: The run value to add to the group.
- :type value: Run
+ :type value: typing.Union[float, AvgStdevStats]
:returns: The updated self.
:rtype: BitCountingGroup
"""
@@ -154,7 +142,9 @@ class BitCountingGroup:
self.cached_bits = None
return self
- def extend(self, runs):
+ def extend(
+ self, runs: typing.Iterable[typing.Union[float, AvgStdevStats]]
+ ) -> "BitCountingGroup":
"""Mutate to add the new runs, return self.
This is saves small amount of computation
@@ -163,7 +153,7 @@ class BitCountingGroup:
Stats are updated, but old bits value is deleted from cache.
:param runs: The runs to add to the group.
- :type value: Iterable[Run]
+ :type value: typing.Iterable[typing.Union[float, AvgStdevStats]]
:returns: The updated self.
:rtype: BitCountingGroup
"""
diff --git a/resources/libraries/python/jumpavg/BitCountingGroupList.py b/resources/libraries/python/jumpavg/bit_counting_group_list.py
index 6a1c86baf2..e4d33b53a2 100644
--- a/resources/libraries/python/jumpavg/BitCountingGroupList.py
+++ b/resources/libraries/python/jumpavg/bit_counting_group_list.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022 Cisco and/or its affiliates.
+# Copyright (c) 2023 Cisco and/or its affiliates.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at:
@@ -13,13 +13,16 @@
"""Module holding BitCountingGroupList class."""
-import copy
+import collections
+import dataclasses
+import typing
-from .BitCountingGroup import BitCountingGroup
+from .avg_stdev_stats import AvgStdevStats # Just for type hints.
+from .bit_counting_group import BitCountingGroup
-class BitCountingGroupList:
- # TODO: Inherit from collections.abc.Sequence in Python 3.
+@dataclasses.dataclass
+class BitCountingGroupList(collections.abc.Sequence):
"""List of data groups which tracks overall bit count.
The Sequence-like access is related to the list of groups,
@@ -41,55 +44,29 @@ class BitCountingGroupList:
recalculations if the bit count is not needed.
"""
- def __init__(self, group_list=None, bits_except_last=0.0, max_value=None):
- """Set the internal state without any calculations.
-
- The group list argument is copied deeply, so it is not a problem
- if the value object is mutated afterwards.
+ max_value: float
+ """Maximal sample value to base bits computation on."""
+ unit: float = 1.0
+ """Typical resolution of the values."""
+ group_list: typing.List[BitCountingGroup] = None
+ """List of groups to compose this group list.
+ Init also accepts None standing for an empty list.
+ This class takes ownership of the list,
+ so caller of init should clone their copy to avoid unexpected mutations.
+ """
+ bits_except_last: float = 0.0
+ """Partial sum of all but one group bits."""
- A "group" stands for an Iterable of runs, where "run" is either
- a float value, or a stats-like object (only size, avg and stdev
- are accessed). Run is a hypothetical abstract class,
- defining it in Python 2 is too much hassle.
+ def __post_init__(self):
+ """Turn possible None into an empty list.
It is not verified whether the user provided values are valid,
- e.g. whether the cached bits values make sense.
-
- The max_value is required and immutable,
- it is recommended the callers find their maximum beforehand.
-
- :param group_list: List of groups to compose this group list (or empty).
- :param bits_except_last: Partial sum of all but one group bits.
- :param max_value: Maximal sample value to base bits computation on.
- :type group_list: Iterable[BitCountingGroup]
- :type bits_except_last: float
- :type max_value: float
- """
- self.group_list = copy.deepcopy(group_list) if group_list else list()
- self.bits_except_last = bits_except_last
- self.max_value = max_value
-
- def __str__(self):
- """Return string with human readable description of the group list.
-
- :returns: Readable description.
- :rtype: str
+ e.g. whether the cached bits values (and bits_except_last) make sense.
"""
- return f"group_list={self.group_list} bits={self.bits}"
-
- def __repr__(self):
- """Return string executable as Python constructor call.
+ if self.group_list is None:
+ self.group_list = []
- :returns: Executable constructor call.
- :rtype: str
- """
- return (
- f"BitCountingGroupList(group_list={self.group_list!r}"
- f",bits_except_last={self.bits_except_last!r}"
- f",max_value={self.max_value!r})"
- )
-
- def __getitem__(self, index):
+ def __getitem__(self, index: int) -> BitCountingGroup:
"""Return the group at the index.
:param index: Index of the group to return.
@@ -99,7 +76,7 @@ class BitCountingGroupList:
"""
return self.group_list[index]
- def __len__(self):
+ def __len__(self) -> int:
"""Return the length of the group list.
:returns: The Length of group_list.
@@ -107,19 +84,46 @@ class BitCountingGroupList:
"""
return len(self.group_list)
- def copy(self):
+ def copy(self) -> "BitCountingGroupList":
"""Return a new instance with copied internal state.
:returns: The copied instance.
:rtype: BitCountingGroupList
"""
return self.__class__(
- group_list=self.group_list, bits_except_last=self.bits_except_last,
- max_value=self.max_value
+ max_value=self.max_value,
+ unit=self.unit,
+ group_list=[group.copy() for group in self.group_list],
+ bits_except_last=self.bits_except_last,
+ )
+
+ def copy_fast(self) -> "BitCountingGroupList":
+ """Return a new instance with minimaly copied internal state.
+
+ The assumption here is that only the last group will ever be mutated
+ (in self, probably never in the return value),
+ so all the previous groups can be "copied by reference".
+
+ :returns: The copied instance.
+ :rtype: BitCountingGroupList
+ """
+ group_list = list(self.group_list)
+ if group_list:
+ group_list[-1] = group_list[-1].copy()
+ # Further speedup is possible by keeping the last group
+ # as a singly linked (from end) list,
+ # but for CSIT sample sizes, copy of whole Python list is faster.
+ # TODO: Implement linked list as an option
+ # for users with many samples.
+ return self.__class__(
+ max_value=self.max_value,
+ unit=self.unit,
+ group_list=group_list,
+ bits_except_last=self.bits_except_last,
)
@property
- def bits(self):
+ def bits(self) -> float:
"""Return overall bit content of the group list.
:returns: The overall information content in bits.
@@ -130,12 +134,17 @@ class BitCountingGroupList:
# TODO: Is it worth to cache the overall result?
return self.bits_except_last + self.group_list[-1].bits
- def append_group_of_runs(self, runs):
+ def append_group_of_runs(
+ self,
+ runs: typing.Union[
+ BitCountingGroup, typing.List[typing.Union[float, AvgStdevStats]]
+ ],
+ ) -> "BitCountingGroupList":
"""Mutate to add a new group based on the runs, return self.
- The argument is copied before adding to the group list,
- so further edits do not affect the grup list.
- The argument can also be a group, only runs from it are used.
+ The list argument is NOT copied before adding to the group list,
+ so further edits MAY not affect the grup list.
+ The list from BitCountingGroup is shallow copied though.
:param runs: Runs to form the next group to be appended to self.
:type runs: Union[Iterable[Run], BitCountingGroup]
@@ -147,16 +156,23 @@ class BitCountingGroupList:
# It is faster to avoid stats recalculation.
new_group = runs.copy()
new_group.max_value = self.max_value
+ # Unit is common.
new_group.prev_avg = prev_avg
new_group.cached_bits = None
else:
new_group = BitCountingGroup(
- run_list=runs, max_value=self.max_value, prev_avg=prev_avg)
+ run_list=runs,
+ max_value=self.max_value,
+ unit=self.unit,
+ prev_avg=prev_avg,
+ )
self.bits_except_last = self.bits
self.group_list.append(new_group)
return self
- def append_run_to_to_last_group(self, run):
+ def append_run_to_to_last_group(
+ self, run: typing.Union[float, AvgStdevStats]
+ ) -> "BitCountingGroupList":
"""Mutate to add new run at the end of the last group.
Basically a one-liner, only returning group list instead of last group.
@@ -170,7 +186,9 @@ class BitCountingGroupList:
self.group_list[-1].append(run)
return self
- def extend_runs_to_last_group(self, runs):
+ def extend_runs_to_last_group(
+ self, runs: typing.Iterable[typing.Union[float, AvgStdevStats]]
+ ) -> "BitCountingGroupList":
"""Mutate to add new runs to the end of the last group.
A faster alternative to appending runs one by one in a loop.
diff --git a/resources/libraries/python/jumpavg/BitCountingStats.py b/resources/libraries/python/jumpavg/bit_counting_stats.py
index 7b5e659214..3d1cb8aef0 100644
--- a/resources/libraries/python/jumpavg/BitCountingStats.py
+++ b/resources/libraries/python/jumpavg/bit_counting_stats.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2021 Cisco and/or its affiliates.
+# Copyright (c) 2023 Cisco and/or its affiliates.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at:
@@ -13,11 +13,14 @@
"""Module holding BitCountingStats class."""
+import dataclasses
import math
+import typing
-from .AvgStdevStats import AvgStdevStats
+from .avg_stdev_stats import AvgStdevStats
+@dataclasses.dataclass
class BitCountingStats(AvgStdevStats):
"""Class for statistics which include information content of a group.
@@ -33,11 +36,22 @@ class BitCountingStats(AvgStdevStats):
Only for_runs method calls the parent implementation, without using super().
"""
- def __init__(
- self, size=0, avg=None, stdev=0.0, max_value=None, prev_avg=None):
- """Construct the stats object by computing from the values needed.
+ max_value: float = None
+ """Maximal sample value (real or estimated).
+ Default value is there just for argument ordering reasons,
+ leaving None leads to exceptions."""
+ unit: float = 1.0
+ """Typical resolution of the values."""
+ prev_avg: typing.Optional[float] = None
+ """Population average of the previous group (if any)."""
+ bits: float = None
+ """The computed information content of the group.
+ It is formally an argument to init function, just to keep repr string
+ a valid call. ut the init value is ignored and always recomputed.
+ """
- The values are not sanitized, faulty callers can cause math errors.
+ def __post_init__(self):
+ """Construct the stats object by computing from the values needed.
The None values are allowed for stats for zero size data,
but such stats can report arbitrary avg and max_value.
@@ -54,91 +68,60 @@ class BitCountingStats(AvgStdevStats):
(but not with floating point mechanic).
The hope is the difference will have
no real impact on the classification procedure.
-
- :param size: Number of values participating in this group.
- :param avg: Population average of the participating sample values.
- :param stdev: Population standard deviation of the sample values.
- :param max_value: Maximal expected value.
- TODO: This might be more optimal,
- but max-invariant algorithm will be nicer.
- :param prev_avg: Population average of the previous group.
- If None, no previous average is taken into account.
- If not None, the given previous average is used to discourage
- consecutive groups with similar averages
- (opposite triangle distribution is assumed).
- :type avg: float
- :type size: int
- :type stdev: float
- :type max_value: Union[float, NoneType]
- :type prev_avg: Union[float, NoneType]
"""
- self.avg = avg
- self.size = size
- self.stdev = stdev
- self.max_value = max_value
- self.prev_avg = prev_avg
# Zero size should in principle have non-zero bits (coding zero size),
# but zero allows users to add empty groups without affecting bits.
self.bits = 0.0
if self.size < 1:
return
- if avg is None:
- raise ValueError(f"Avg is None: {self!r}")
- if max_value is None or max_value <= 0.0:
+ if self.max_value <= 0.0:
raise ValueError(f"Invalid max value: {self!r}")
+ max_value = self.max_value / self.unit
+ avg = self.avg / self.unit
# Length of the sequence must be also counted in bits,
# otherwise the message would not be decodable.
# Model: probability of k samples is 1/k - 1/(k+1) == 1/k/(k+1)
# This is compatible with zero size leading to zero bits.
- self.bits += math.log(size * (size + 1), 2)
- if prev_avg is None:
+ self.bits += math.log(self.size * (self.size + 1), 2)
+ if self.prev_avg is None:
# Avg is considered to be uniformly distributed
# from zero to max_value.
- self.bits += math.log(max_value + 1.0, 2)
+ self.bits += math.log(max_value + 1, 2)
else:
# Opposite triangle distribution with minimum.
- self.bits += math.log(
- max_value * (max_value + 1) / (abs(avg - prev_avg) + 1), 2)
+ prev_avg = self.prev_avg / self.unit
+ norm = prev_avg * prev_avg
+ norm -= (prev_avg - 1) * max_value
+ norm += max_value * max_value / 2
+ self.bits -= math.log((abs(avg - prev_avg) + 1) / norm, 2)
if self.size < 2:
return
- # Stdev is considered to be uniformly distributed
- # from zero to max_value. That is quite a bad expectation,
- # but resilient to negative samples etc.
- self.bits += math.log(max_value + 1.0, 2)
+ stdev = self.stdev / self.unit
+ # Stdev can be anything between zero and max value.
+ # For size==2, sphere surface is 2 points regardless of radius,
+ # we need to penalize large stdev already when encoding the stdev.
+ # The simplest way is to use the same distribution as with size...
+ self.bits += math.log((stdev + 1) * (stdev + 2), 2)
+ # .. just with added normalization from the max value cut-off.
+ self.bits += math.log(1 - 1 / (max_value + 2), 2)
# Now we know the samples lie on sphere in size-1 dimensions.
# So it is (size-2)-sphere, with radius^2 == stdev^2 * size.
# https://en.wikipedia.org/wiki/N-sphere
- sphere_area_ln = math.log(2) + math.log(math.pi) * ((size - 1) / 2.0)
- sphere_area_ln -= math.lgamma((size - 1) / 2.0)
- sphere_area_ln += math.log(stdev + 1.0) * (size - 2)
- sphere_area_ln += math.log(size) * ((size - 2) / 2.0)
+ sphere_area_ln = math.log(2)
+ sphere_area_ln += math.log(math.pi) * ((self.size - 1) / 2)
+ sphere_area_ln -= math.lgamma((self.size - 1) / 2)
+ sphere_area_ln += math.log(stdev + 1) * (self.size - 2)
+ sphere_area_ln += math.log(self.size) * ((self.size - 2) / 2)
self.bits += sphere_area_ln / math.log(2)
- def __str__(self):
- """Return string with human readable description of the group.
-
- :returns: Readable description.
- :rtype: str
- """
- return (
- f"size={self.size} avg={self.avg} stdev={self.stdev}"
- f" bits={self.bits}"
- )
-
- def __repr__(self):
- """Return string executable as Python constructor call.
-
- :returns: Executable constructor call.
- :rtype: str
- """
- return (
- f"BitCountingStats(size={self.size!r},avg={self.avg!r}"
- f",stdev={self.stdev!r},max_value={self.max_value!r}"
- f",prev_avg={self.prev_avg!r})"
- )
-
@classmethod
- def for_runs(cls, runs, max_value=None, prev_avg=None):
+ def for_runs_and_params(
+ cls,
+ runs: typing.Iterable[typing.Union[float, AvgStdevStats]],
+ max_value: float,
+ unit: float = 1.0,
+ prev_avg: typing.Optional[float] = None,
+ ):
"""Return new stats instance describing the sequence of runs.
If you want to append data to existing stats object,
@@ -156,14 +139,22 @@ class BitCountingStats(AvgStdevStats):
:param runs: Sequence of data to describe by the new metadata.
:param max_value: Maximal expected value.
+ :param unit: Typical resolution of the values.
:param prev_avg: Population average of the previous group, if any.
:type runs: Iterable[Union[float, AvgStdevStats]]
:type max_value: Union[float, NoneType]
+ :type unit: float
:type prev_avg: Union[float, NoneType]
:returns: The new stats instance.
:rtype: cls
"""
asd = AvgStdevStats.for_runs(runs)
- ret_obj = cls(size=asd.size, avg=asd.avg, stdev=asd.stdev,
- max_value=max_value, prev_avg=prev_avg)
+ ret_obj = cls(
+ size=asd.size,
+ avg=asd.avg,
+ stdev=asd.stdev,
+ max_value=max_value,
+ unit=unit,
+ prev_avg=prev_avg,
+ )
return ret_obj
diff --git a/resources/libraries/python/jumpavg/classify.py b/resources/libraries/python/jumpavg/classify.py
index 252c71e8d5..cc3cdcceed 100644
--- a/resources/libraries/python/jumpavg/classify.py
+++ b/resources/libraries/python/jumpavg/classify.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2021 Cisco and/or its affiliates.
+# Copyright (c) 2023 Cisco and/or its affiliates.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at:
@@ -13,32 +13,54 @@
"""Module holding the classify function
-Classification os one of primary purposes of this package.
+Classification is one of primary purposes of this package.
Minimal message length principle is used
for grouping results into the list of groups,
assuming each group is a population of different Gaussian distribution.
"""
-from .AvgStdevStats import AvgStdevStats
-from .BitCountingGroupList import BitCountingGroupList
+from typing import Iterable, Optional, Union
+from .avg_stdev_stats import AvgStdevStats
+from .bit_counting_group_list import BitCountingGroupList
-def classify(values):
+
+def classify(
+ values: Iterable[Union[float, Iterable[float]]],
+ unit: Optional[float] = None,
+ sbps: Optional[float] = None,
+) -> BitCountingGroupList:
"""Return the values in groups of optimal bit count.
Here, a value is either a float, or an iterable of floats.
Such iterables represent an undivisible sequence of floats.
+ Int is accepted anywhere instead of float.
Internally, such sequence is replaced by AvgStdevStats
after maximal value is found.
+ If the values are smaller than expected (below one unit),
+ the underlying assumption break down and the classification is wrong.
+ Use the "unit" parameter to hint at what the input resolution is.
+
+ If the correct value of unit is not known beforehand,
+ the argument "sbps" (Significant Bits Per Sample) can be used
+ to set unit such that maximal sample value is this many ones in binary.
+ If neither "unit" nor "sbps" are given, "sbps" of 12 is used by default.
+
:param values: Sequence of runs to classify.
+ :param unit: Typical resolution of the values.
+ Zero and None means no unit given.
+ :param sbps: Significant Bits Per Sample. None on zero means 12.
+ If units is not set, this is used to compute unit from max sample value.
:type values: Iterable[Union[float, Iterable[float]]]
+ :type unit: Optional[float]
+ :type sbps: Optional[float]
:returns: Classified group list.
:rtype: BitCountingGroupList
"""
- processed_values = list()
+ processed_values = []
max_value = 0.0
for value in values:
if isinstance(value, (float, int)):
@@ -50,27 +72,27 @@ def classify(values):
if subvalue > max_value:
max_value = subvalue
processed_values.append(AvgStdevStats.for_runs(value))
- open_at = list()
- closed_before = [BitCountingGroupList(max_value=max_value)]
- for index, value in enumerate(processed_values):
- newly_open = closed_before[index].copy()
- newly_open.append_group_of_runs([value])
- open_at.append(newly_open)
- record_group_list = newly_open
- for previous_index, old_open in enumerate(open_at[:index]):
- new_open = old_open.copy().append_run_to_to_last_group(value)
- open_at[previous_index] = new_open
- if new_open.bits < record_group_list.bits:
- record_group_list = new_open
- closed_before.append(record_group_list)
- partition = closed_before[-1]
- previous_average = partition[0].stats.avg
- for group in partition:
- if group.stats.avg == previous_average:
- group.comment = u"normal"
- elif group.stats.avg < previous_average:
- group.comment = u"regression"
+ if not unit:
+ if not sbps:
+ sbps = 12.0
+ max_in_units = pow(2.0, sbps + 1.0) - 1.0
+ unit = max_value / max_in_units
+ # Glist means group list (BitCountingGroupList).
+ open_glists = []
+ record_glist = BitCountingGroupList(max_value=max_value, unit=unit)
+ for value in processed_values:
+ new_open_glist = record_glist.copy_fast().append_group_of_runs([value])
+ record_glist = new_open_glist
+ for old_open_glist in open_glists:
+ old_open_glist.append_run_to_to_last_group(value)
+ if old_open_glist.bits < record_glist.bits:
+ record_glist = old_open_glist
+ open_glists.append(new_open_glist)
+ previous_average = record_glist[0].stats.avg
+ for group in record_glist:
+ if group.stats.avg < previous_average:
+ group.comment = "regression"
elif group.stats.avg > previous_average:
- group.comment = u"progression"
+ group.comment = "progression"
previous_average = group.stats.avg
- return partition
+ return record_glist