diff options
author | Vratko Polak <vrpolak@cisco.com> | 2018-06-08 18:07:35 +0200 |
---|---|---|
committer | Tibor Frank <tifrank@cisco.com> | 2018-06-11 08:30:21 +0000 |
commit | beeb2acb9ac153eaa54983bea46a76d596168965 (patch) | |
tree | 0465617b135a2e64693265969c48ff466db3d287 /resources/tools/presentation/new/jumpavg/BitCountingMetadata.py | |
parent | 3dcef45002a1b82c4503ec590d680950930fa193 (diff) |
CSIT-1110: Integrate anomaly detection into PAL
+ Keep the original detection,
+ add the new one as subdirectory
(both in source and in rendered tree).
- The new detection is not rebased over "Add dpdk mrr tests to trending".
New detection features:
+ Do not remove (nor detect) outliers.
+ Trend line shows the constant average within a group.
+ Anomaly circles are placed at the changed average.
+ Small bias against too similar averages.
+ Should be ready for moving the detection library out to pip.
Change-Id: I7ab1a92b79eeeed53ba65a071b1305e927816a89
Signed-off-by: Vratko Polak <vrpolak@cisco.com>
Diffstat (limited to 'resources/tools/presentation/new/jumpavg/BitCountingMetadata.py')
-rw-r--r-- | resources/tools/presentation/new/jumpavg/BitCountingMetadata.py | 102 |
1 files changed, 102 insertions, 0 deletions
diff --git a/resources/tools/presentation/new/jumpavg/BitCountingMetadata.py b/resources/tools/presentation/new/jumpavg/BitCountingMetadata.py new file mode 100644 index 0000000000..67d111985f --- /dev/null +++ b/resources/tools/presentation/new/jumpavg/BitCountingMetadata.py @@ -0,0 +1,102 @@ +# Copyright (c) 2018 Cisco and/or its affiliates. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math + +from AvgStdevMetadata import AvgStdevMetadata + + +class BitCountingMetadata(AvgStdevMetadata): + """Class for metadata which includes information content.""" + + def __init__(self, max_value, size=0, avg=0.0, stdev=0.0, prev_avg=None): + """Construct the metadata by computing from the values needed. + + The bit count is not real, as that would depend on numeric precision + (number of significant bits in values). + The difference is assumed to be constant per value, + which is consistent with Gauss distribution + (but not with floating point mechanic). + The hope is the difference will have + no real impact on the classification procedure. + + :param max_value: Maximal expected value. + TODO: This might be more optimal, + but max-invariant algorithm will be nicer. + :param size: Number of values participating in this group. + :param avg: Population average of the participating sample values. + :param stdev: Population standard deviation of the sample values. + :param prev_avg: Population average of the previous group. + If None, no previous average is taken into account. + If not None, the given previous average is used to discourage + consecutive groups with similar averages + (opposite triangle distribution is assumed). + :type max_value: float + :type size: int + :type avg: float + :type stdev: float + :type prev_avg: float or None + """ + super(BitCountingMetadata, self).__init__(size, avg, stdev) + self.max_value = max_value + self.prev_avg = prev_avg + self.bits = 0.0 + if self.size < 1: + return + # Length of the sequence must be also counted in bits, + # otherwise the message would not be decodable. + # Model: probability of k samples is 1/k - 1/(k+1) + # == 1/k/(k+1) + self.bits += math.log(size * (size + 1), 2) + if prev_avg is None: + # Avg is considered to be uniformly distributed + # from zero to max_value. + self.bits += math.log(max_value + 1.0, 2) + else: + # Opposite triangle distribution with minimum. + self.bits += math.log( + max_value * (max_value + 1) / (abs(avg - prev_avg) + 1), 2) + if self.size < 2: + return + # Stdev is considered to be uniformly distributed + # from zero to max_value. That is quite a bad expectation, + # but resilient to negative samples etc. + self.bits += math.log(max_value + 1.0, 2) + # Now we know the samples lie on sphere in size-1 dimensions. + # So it is (size-2)-sphere, with radius^2 == stdev^2 * size. + # https://en.wikipedia.org/wiki/N-sphere + sphere_area_ln = math.log(2) + math.log(math.pi) * ((size - 1) / 2.0) + sphere_area_ln -= math.lgamma((size - 1) / 2.0) + sphere_area_ln += math.log(stdev + 1.0) * (size - 2) + sphere_area_ln += math.log(size) * ((size - 2) / 2.0) + self.bits += sphere_area_ln / math.log(2) + + def __str__(self): + """Return string with human readable description of the group. + + :returns: Readable description. + :rtype: str + """ + return "size={size} avg={avg} stdev={stdev} bits={bits}".format( + size=self.size, avg=self.avg, stdev=self.stdev, bits=self.bits) + + def __repr__(self): + """Return string executable as Python constructor call. + + :returns: Executable constructor call. + :rtype: str + """ + return ("BitCountingMetadata(max_value={max_value},size={size}," + + "avg={avg},stdev={stdev},prev_avg={prev_avg})").format( + max_value=self.max_value, size=self.size, avg=self.avg, + stdev=self.stdev, prev_avg=self.prev_avg) |