aboutsummaryrefslogtreecommitdiffstats
path: root/resources/libraries/python/jumpavg/avg_stdev_stats.py
diff options
context:
space:
mode:
authorVratko Polak <vrpolak@cisco.com>2023-06-02 14:44:47 +0200
committerTibor Frank <tifrank@cisco.com>2023-06-07 05:53:55 +0000
commit079c390e0903a98182781ff5c2af2bba9902b4ed (patch)
tree98116eea60af0ed530548bb52dff126aef0dc9e9 /resources/libraries/python/jumpavg/avg_stdev_stats.py
parent2c9b2a4298cac5cc4d6ca60cb1da8bd72ec23c37 (diff)
feat(jumpavg): support small values via unit param
Previously, Jumpavg was known to give wrong results when the data contains values of order one or smaller. This change introduces a new "unit" parameter, which changes how the information content is calculated. For example if the data values are mutiplies of 0.01, the unit parameter should be set to 0.01 to compensate. For callers not knowing their correct unit value, another parameter is introduced, called "sbps" (meaning Significant Bits Per Sample). A binary integer number with this many ones is how much units should the maximal sample be. This way jumpavg computes the corresponding "unit" value to use. If neither "unit" nor "sbps" are given, the "sbps" value of 12 is applied. + Rename files to conform to snake_style naming. + Fix normalization for the "opposite triangle" distribution. + Simplify logic, all groups now start as "normal", not "unknown". + Minor style improvements as suggested by pylint. + From user perspective, this change should be backward compatible. - The normalization fix is a behavior change, but it is a bugfix and the new behavior should be better. Change-Id: I5a5ca11757f087fff13faf1d0b8e34a741400258 Signed-off-by: Vratko Polak <vrpolak@cisco.com>
Diffstat (limited to 'resources/libraries/python/jumpavg/avg_stdev_stats.py')
-rw-r--r--resources/libraries/python/jumpavg/avg_stdev_stats.py89
1 files changed, 89 insertions, 0 deletions
diff --git a/resources/libraries/python/jumpavg/avg_stdev_stats.py b/resources/libraries/python/jumpavg/avg_stdev_stats.py
new file mode 100644
index 0000000000..3d6a834919
--- /dev/null
+++ b/resources/libraries/python/jumpavg/avg_stdev_stats.py
@@ -0,0 +1,89 @@
+# Copyright (c) 2023 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Module holding AvgStdevStats class."""
+
+import dataclasses
+import math
+import typing
+
+
+@dataclasses.dataclass
+class AvgStdevStats:
+ """Class for statistics which include average and stdev of a group.
+
+ Contrary to other stats types, adding values to the group
+ is computationally light without any caching.
+
+ Instances are only statistics, the data itself is stored elsewhere.
+ """
+
+ size: int = 0
+ """Number of scalar values (samples) participating in this group."""
+ avg: float = 0.0
+ """Population average of the participating sample values."""
+ stdev: float = 0.0
+ """Population standard deviation of the sample values."""
+
+ @classmethod
+ def for_runs(
+ cls,
+ runs: typing.Iterable[typing.Union[float, "AvgStdevStats"]],
+ ) -> "AvgStdevStats":
+ """Return new stats instance describing the sequence of runs.
+
+ If you want to append data to existing stats object,
+ you can simply use the stats object as the first run.
+
+ Instead of a verb, "for" is used to start this method name,
+ to signify the result contains less information than the input data.
+
+ Here, run is a hypothetical abstract class, an union of float and cls.
+ Defining that as a real abstract class in Python is too much hassle.
+
+ :param runs: Sequence of data to describe by the new metadata.
+ :type runs: Iterable[Union[float, cls]]
+ :returns: The new stats instance.
+ :rtype: cls
+ """
+ # Using Welford method to be more resistant to rounding errors.
+ # Adapted from code for sample standard deviation at:
+ # https://www.johndcook.com/blog/standard_deviation/
+ # The logic of plus operator is taken from
+ # https://www.johndcook.com/blog/skewness_kurtosis/
+ total_size = 0
+ total_avg = 0.0
+ moment_2 = 0.0
+ for run in runs:
+ if isinstance(run, (float, int)):
+ run_size = 1
+ run_avg = run
+ run_stdev = 0.0
+ else:
+ run_size = run.size
+ run_avg = run.avg
+ run_stdev = run.stdev
+ old_total_size = total_size
+ delta = run_avg - total_avg
+ total_size += run_size
+ total_avg += delta * run_size / total_size
+ moment_2 += run_stdev * run_stdev * run_size
+ moment_2 += delta * delta * old_total_size * run_size / total_size
+ if total_size < 1:
+ # Avoid division by zero.
+ return cls(size=0)
+ # TODO: Is it worth tracking moment_2 instead, and compute and cache
+ # stdev on demand, just to possibly save some sqrt calls?
+ total_stdev = math.sqrt(moment_2 / total_size)
+ ret_obj = cls(size=total_size, avg=total_avg, stdev=total_stdev)
+ return ret_obj