aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVratko Polak <vrpolak@cisco.com>2023-07-13 14:19:37 +0200
committerTibor Frank <tifrank@cisco.com>2023-07-14 08:16:52 +0000
commit942a580ecb880a0a8b886bc247b40ca43c96abb9 (patch)
tree449170b71d47de1976b2e40e9202ec7d184ef9f4
parent99c97de874f20feaa597d3b40e795e5ce854f60b (diff)
fix(jumpavg): penalize stdev also for size=2
This fix is mainly needed for bisection using PDR values. The impact on trending is smaller but still beneficial, as this fix should reduce the amount of false anomalies for two-band and other unstable tests. + Update metadata for 0.4.1 release into PyPI. Change-Id: Iabab4df50f4c4ad034362820904a237c507fa710 Signed-off-by: Vratko Polak <vrpolak@cisco.com>
-rw-r--r--PyPI/jumpavg/README.md2
-rw-r--r--PyPI/jumpavg/pyproject.toml2
-rw-r--r--resources/libraries/python/jumpavg/bit_counting_stats.py11
3 files changed, 10 insertions, 5 deletions
diff --git a/PyPI/jumpavg/README.md b/PyPI/jumpavg/README.md
index e3cae0d924..e93e4dc13b 100644
--- a/PyPI/jumpavg/README.md
+++ b/PyPI/jumpavg/README.md
@@ -23,6 +23,8 @@ TODO.
TODO: Move into a separate file?
++ 0.4.1: Fixed bug of not penalizing large stdev enough (at all for size 2 stats).
+
+ 0.4.0: Added "unit" and "sbps" parameters so information content
is reasonable even if sample values are below one.
diff --git a/PyPI/jumpavg/pyproject.toml b/PyPI/jumpavg/pyproject.toml
index 275482ecad..ee6b4cabed 100644
--- a/PyPI/jumpavg/pyproject.toml
+++ b/PyPI/jumpavg/pyproject.toml
@@ -1,6 +1,6 @@
[project]
name = "jumpavg"
-version = "0.4.0"
+version = "0.4.1"
description = "Library for locating changes in time series by grouping results."
authors = [
{ name = "Cisco Systems Inc. and/or its affiliates", email = "csit-dev@lists.fd.io" },
diff --git a/resources/libraries/python/jumpavg/bit_counting_stats.py b/resources/libraries/python/jumpavg/bit_counting_stats.py
index caece2c8ca..3d1cb8aef0 100644
--- a/resources/libraries/python/jumpavg/bit_counting_stats.py
+++ b/resources/libraries/python/jumpavg/bit_counting_stats.py
@@ -97,10 +97,13 @@ class BitCountingStats(AvgStdevStats):
if self.size < 2:
return
stdev = self.stdev / self.unit
- # Stdev is considered to be uniformly distributed
- # from zero to max_value. That is quite a bad expectation,
- # but resilient to negative samples etc.
- self.bits += math.log(max_value + 1, 2)
+ # Stdev can be anything between zero and max value.
+ # For size==2, sphere surface is 2 points regardless of radius,
+ # we need to penalize large stdev already when encoding the stdev.
+ # The simplest way is to use the same distribution as with size...
+ self.bits += math.log((stdev + 1) * (stdev + 2), 2)
+ # .. just with added normalization from the max value cut-off.
+ self.bits += math.log(1 - 1 / (max_value + 2), 2)
# Now we know the samples lie on sphere in size-1 dimensions.
# So it is (size-2)-sphere, with radius^2 == stdev^2 * size.
# https://en.wikipedia.org/wiki/N-sphere