diff options
author | Vratko Polak <vrpolak@cisco.com> | 2018-06-28 13:07:14 +0200 |
---|---|---|
committer | Tibor Frank <tifrank@cisco.com> | 2018-06-28 12:07:43 +0000 |
commit | a60792260765e15cebae10d2c54fb2c3aef75d77 (patch) | |
tree | cc32cf1641ce2f1f0e80b160530c9e93cbd30884 | |
parent | fe757aee086b669ab042a9216f7e01a2d9a7ffcb (diff) |
Fix jumpavg: No negative variance from rounding
The algorithm for computing stdev has been changed
to a more stable one, based on Welford's algorithm.
Change-Id: I51e02d9c5c26cda0d4e998381d5011aa793e6483
Signed-off-by: Vratko Polak <vrpolak@cisco.com>
-rw-r--r-- | PyPI/jumpavg/jumpavg/AvgStdevMetadataFactory.py | 35 |
1 files changed, 19 insertions, 16 deletions
diff --git a/PyPI/jumpavg/jumpavg/AvgStdevMetadataFactory.py b/PyPI/jumpavg/jumpavg/AvgStdevMetadataFactory.py index 6d2e967a88..25bc600aeb 100644 --- a/PyPI/jumpavg/jumpavg/AvgStdevMetadataFactory.py +++ b/PyPI/jumpavg/jumpavg/AvgStdevMetadataFactory.py @@ -30,22 +30,25 @@ class AvgStdevMetadataFactory(object): :returns: The metadata matching the values. :rtype: AvgStdevMetadata """ - sum_0 = 0 - sum_1 = 0.0 - sum_2 = 0.0 + # Using Welford method to be more resistant to rounding errors. + # Adapted from code for sample standard deviation at: + # https://www.johndcook.com/blog/standard_deviation/ + # The logic of plus operator is taken from + # https://www.johndcook.com/blog/skewness_kurtosis/ + size = 0 + avg = 0.0 + moment_2 = 0.0 for value in values: - if isinstance(value, AvgStdevMetadata): - sum_0 += value.size - sum_1 += value.avg * value.size - sum_2 += value.stdev * value.stdev * value.size - sum_2 += value.avg * value.avg * value.size - else: # The value is assumed to be float. - sum_0 += 1 - sum_1 += value - sum_2 += value * value - if sum_0 < 1: + if not isinstance(value, AvgStdevMetadata): + value = AvgStdevMetadata(size=1, avg=value) + old_size = size + delta = value.avg - avg + size += value.size + avg += delta * value.size / size + moment_2 += value.stdev * value.stdev * value.size + moment_2 += delta * delta * old_size * value.size / size + if size < 1: return AvgStdevMetadata() - avg = sum_1 / sum_0 - stdev = math.sqrt(sum_2 / sum_0 - avg * avg) - ret_obj = AvgStdevMetadata(size=sum_0, avg=avg, stdev=stdev) + stdev = math.sqrt(moment_2 / size) + ret_obj = AvgStdevMetadata(size=size, avg=avg, stdev=stdev) return ret_obj |