aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTibor Frank <tifrank@cisco.com>2020-05-06 16:43:58 +0200
committerTibor Frank <tifrank@cisco.com>2020-05-06 16:43:58 +0200
commit842a6bd46b2dd5479bcebb24c4ef3f42cf33477c (patch)
tree14d2a7aca2272a742251c1fd4f8ec04fb38ca06b
parent1e4c964d22678355f55dbc000e47ccf5785d5c17 (diff)
Add jumpavg
Signed-off-by: Tibor Frank <tifrank@cisco.com> Change-Id: Ide837683569d1ec0e409565282c058264695640d
-rw-r--r--resources/libraries/python/jumpavg/AvgStdevStats.py113
-rw-r--r--resources/libraries/python/jumpavg/BitCountingGroup.py173
-rw-r--r--resources/libraries/python/jumpavg/BitCountingGroupList.py185
-rw-r--r--resources/libraries/python/jumpavg/BitCountingStats.py169
-rw-r--r--resources/libraries/python/jumpavg/__init__.py22
-rw-r--r--resources/libraries/python/jumpavg/classify.py76
6 files changed, 738 insertions, 0 deletions
diff --git a/resources/libraries/python/jumpavg/AvgStdevStats.py b/resources/libraries/python/jumpavg/AvgStdevStats.py
new file mode 100644
index 0000000000..9a8decd932
--- /dev/null
+++ b/resources/libraries/python/jumpavg/AvgStdevStats.py
@@ -0,0 +1,113 @@
+# Copyright (c) 2019 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Module holding AvgStdevStats class."""
+
+import math
+
+
+class AvgStdevStats:
+ """Class for statistics which include average and stdev of a group.
+
+ Contrary to other stats types, adding values to the group
+ is computationally light without any caching.
+
+ Instances are only statistics, the data itself is stored elsewhere.
+ """
+
+ def __init__(self, size=0, avg=0.0, stdev=0.0):
+ """Construct the stats object by storing the values needed.
+
+ Each value has to be numeric.
+ The values are not sanitized depending on size, wrong initialization
+ can cause delayed math errors.
+
+ :param size: Number of values participating in this group.
+ :param avg: Population average of the participating sample values.
+ :param stdev: Population standard deviation of the sample values.
+ :type size: int
+ :type avg: float
+ :type stdev: float
+ """
+ self.size = size
+ self.avg = avg
+ self.stdev = stdev
+
+ def __str__(self):
+ """Return string with human readable description of the group.
+
+ :returns: Readable description.
+ :rtype: str
+ """
+ return f"size={self.size} avg={self.avg} stdev={self.stdev}"
+
+ def __repr__(self):
+ """Return string executable as Python constructor call.
+
+ :returns: Executable constructor call.
+ :rtype: str
+ """
+ return (
+ f"AvgStdevStats(size={self.size!r},avg={self.avg!r}"
+ f",stdev={self.stdev!r})"
+ )
+
+ @classmethod
+ def for_runs(cls, runs):
+ """Return new stats instance describing the sequence of runs.
+
+ If you want to append data to existing stats object,
+ you can simply use the stats object as the first run.
+
+ Instead of a verb, "for" is used to start this method name,
+ to signify the result contains less information than the input data.
+
+ Here, Run is a hypothetical abstract class, an union of float and cls.
+ Defining that as a real abstract class in Python 2 is too much hassle.
+
+ :param runs: Sequence of data to describe by the new metadata.
+ :type runs: Iterable[Union[float, cls]]
+ :returns: The new stats instance.
+ :rtype: cls
+ """
+ # Using Welford method to be more resistant to rounding errors.
+ # Adapted from code for sample standard deviation at:
+ # https://www.johndcook.com/blog/standard_deviation/
+ # The logic of plus operator is taken from
+ # https://www.johndcook.com/blog/skewness_kurtosis/
+ total_size = 0
+ total_avg = 0.0
+ moment_2 = 0.0
+ for run in runs:
+ if isinstance(run, (float, int)):
+ run_size = 1
+ run_avg = run
+ run_stdev = 0.0
+ else:
+ run_size = run.size
+ run_avg = run.avg
+ run_stdev = run.stdev
+ old_total_size = total_size
+ delta = run_avg - total_avg
+ total_size += run_size
+ total_avg += delta * run_size / total_size
+ moment_2 += run_stdev * run_stdev * run_size
+ moment_2 += delta * delta * old_total_size * run_size / total_size
+ if total_size < 1:
+ # Avoid division by zero.
+ return cls(size=0)
+ # TODO: Is it worth tracking moment_2 instead, and compute and cache
+ # stdev on demand, just to possibly save some sqrt calls?
+ total_stdev = math.sqrt(moment_2 / total_size)
+ ret_obj = cls(size=total_size, avg=total_avg, stdev=total_stdev)
+ return ret_obj
diff --git a/resources/libraries/python/jumpavg/BitCountingGroup.py b/resources/libraries/python/jumpavg/BitCountingGroup.py
new file mode 100644
index 0000000000..0c1aabba30
--- /dev/null
+++ b/resources/libraries/python/jumpavg/BitCountingGroup.py
@@ -0,0 +1,173 @@
+# Copyright (c) 2019 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Module holding BitCountingGroup class."""
+
+import copy
+
+from .AvgStdevStats import AvgStdevStats
+from .BitCountingStats import BitCountingStats
+
+
+class BitCountingGroup:
+ # TODO: Inherit from collections.abc.Sequence in Python 3.
+ """Group of runs which tracks bit count in an efficient manner.
+
+ This class contains methods that mutate the internal state,
+ use copy() method to save the previous state.
+
+ The Sequence-like access is related to the list of runs,
+ for example group[0] returns the first run in the list.
+ Writable list-like methods are not implemented.
+
+ As the group bit count depends on previous average
+ and overall maximal value, those values are assumed
+ to be known beforehand (and immutable).
+
+ As the caller is allowed to divide runs into groups in any way,
+ a method to add a single run in an efficient manner is provided.
+ """
+
+ def __init__(self, run_list=None, stats=None, bits=None,
+ max_value=None, prev_avg=None, comment="unknown"):
+ """Set the internal state and partially the stats.
+
+ A "group" stands for an Iterable of runs, where "run" is either
+ a float value, or a stats-like object (only size, avg and stdev
+ are accessed). Run is a hypothetical abstract class,
+ defining it in Python 2 is too much hassle.
+
+ Only a copy of the run list argument value is stored in the instance,
+ so it is not a problem if the value object is mutated afterwards.
+
+ It is not verified whether the user provided values are valid,
+ e.g. whether the stats and bits values reflect the runs.
+
+ :param run_list: List of run to compose into this group. Default: empty.
+ :param stats: Stats object used for computing bits.
+ :param bits: Cached value of information content.
+ :param max_value: Maximal sample value to be used for computing.
+ :param prev_avg: Average of the previous group, affects bits.
+ :param comment: Any string giving more info, e.g. "regression".
+ :type run_list: Iterable[Run]
+ :type stats: Optional[AvgStdevStats]
+ :type bits: Optional[float]
+ :type max_value: float
+ :type prev_avg: Optional[float]
+ :type comment: str
+ """
+ self.run_list = copy.deepcopy(run_list) if run_list else list()
+ self.stats = stats
+ self.cached_bits = bits
+ self.max_value = max_value
+ self.prev_avg = prev_avg
+ self.comment = comment
+ if self.stats is None:
+ self.stats = AvgStdevStats.for_runs(self.run_list)
+
+ def __str__(self):
+ """Return string with human readable description of the group.
+
+ :returns: Readable description.
+ :rtype: str
+ """
+ return f"stats={self.stats} bits={self.cached_bits}"
+
+ def __repr__(self):
+ """Return string executable as Python constructor call.
+
+ :returns: Executable constructor call.
+ :rtype: str
+ """
+ return (
+ f"BitCountingGroup(run_list={self.run_list!r},stats={self.stats!r}"
+ f",bits={self.cached_bits!r},max_value={self.max_value!r}"
+ f",prev_avg={self.prev_avg!r},comment={self.comment!r})"
+ )
+
+ def __getitem__(self, index):
+ """Return the run at the index.
+
+ :param index: Index of the run to return.
+ :type index: int
+ :returns: The run at the index.
+ :rtype: Run
+ """
+ return self.run_list[index]
+
+ def __len__(self):
+ """Return the number of runs in the group.
+
+ :returns: The Length of run_list.
+ :rtype: int
+ """
+ return len(self.run_list)
+
+ def copy(self):
+ """Return a new instance with copied internal state.
+
+ :returns: The copied instance.
+ :rtype: BitCountingGroup
+ """
+ stats = AvgStdevStats.for_runs([self.stats])
+ return self.__class__(
+ run_list=self.run_list, stats=stats, bits=self.cached_bits,
+ max_value=self.max_value, prev_avg=self.prev_avg,
+ comment=self.comment)
+
+ @property
+ def bits(self):
+ """Return overall bit content of the group list.
+
+ If not cached, compute from stats and cache.
+
+ :returns: The overall information content in bits.
+ :rtype: float
+ """
+ if self.cached_bits is None:
+ self.cached_bits = BitCountingStats.for_runs(
+ [self.stats], self.max_value, self.prev_avg).bits
+ return self.cached_bits
+
+ def append(self, run):
+ """Mutate to add the new run, return self.
+
+ Stats are updated, but old bits value is deleted from cache.
+
+ :param run: The run value to add to the group.
+ :type value: Run
+ :returns: The updated self.
+ :rtype: BitCountingGroup
+ """
+ self.run_list.append(run)
+ self.stats = AvgStdevStats.for_runs([self.stats, run])
+ self.cached_bits = None
+ return self
+
+ def extend(self, runs):
+ """Mutate to add the new runs, return self.
+
+ This is saves small amount of computation
+ compared to adding runs one by one in a loop.
+
+ Stats are updated, but old bits value is deleted from cache.
+
+ :param runs: The runs to add to the group.
+ :type value: Iterable[Run]
+ :returns: The updated self.
+ :rtype: BitCountingGroup
+ """
+ self.run_list.extend(runs)
+ self.stats = AvgStdevStats.for_runs([self.stats] + runs)
+ self.cached_bits = None
+ return self
diff --git a/resources/libraries/python/jumpavg/BitCountingGroupList.py b/resources/libraries/python/jumpavg/BitCountingGroupList.py
new file mode 100644
index 0000000000..bcc5e43267
--- /dev/null
+++ b/resources/libraries/python/jumpavg/BitCountingGroupList.py
@@ -0,0 +1,185 @@
+# Copyright (c) 2019 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Module holding BitCountingGroupList class."""
+
+import copy
+
+from .BitCountingGroup import BitCountingGroup
+
+
+class BitCountingGroupList:
+ # TODO: Inherit from collections.abc.Sequence in Python 3.
+ """List of data groups which tracks overall bit count.
+
+ The Sequence-like access is related to the list of groups,
+ for example group_list[0] returns the first group in the list.
+ Writable list-like methods are not implemented.
+
+ The overall bit count is the sum of bit counts of each group.
+ Group is a sequence of data samples accompanied by their stats.
+ Different partitioning of data samples into the groups
+ results in different overall bit count.
+ This can be used to group samples in various contexts.
+
+ As the group bit count depends on previous average
+ and overall maximal value, order of groups is important.
+ Having the logic encapsulated here spares the caller
+ the effort to pass averages around.
+
+ The data can be only added, and there is some logic to skip
+ recalculations if the bit count is not needed.
+ """
+
+ def __init__(self, group_list=None, bits_except_last=0.0, max_value=None):
+ """Set the internal state without any calculations.
+
+ The group list argument is copied deeply, so it is not a problem
+ if the value object is mutated afterwards.
+
+ A "group" stands for an Iterable of runs, where "run" is either
+ a float value, or a stats-like object (only size, avg and stdev
+ are accessed). Run is a hypothetical abstract class,
+ defining it in Python 2 is too much hassle.
+
+ It is not verified whether the user provided values are valid,
+ e.g. whether the cached bits values make sense.
+
+ The max_value is required and immutable,
+ it is recommended the callers find their maximum beforehand.
+
+ :param group_list: List of groups to compose this group list (or empty).
+ :param bits_except_last: Partial sum of all but one group bits.
+ :param max_value: Maximal sample value to base bits computation on.
+ :type group_list: Iterable[BitCountingGroup]
+ :type bits_except_last: float
+ :type max_value: float
+ """
+ self.group_list = copy.deepcopy(group_list) if group_list else list()
+ self.bits_except_last = bits_except_last
+ self.max_value = max_value
+
+ def __str__(self):
+ """Return string with human readable description of the group list.
+
+ :returns: Readable description.
+ :rtype: str
+ """
+ return u"group_list={self.group_list} bits={self.bits}"
+
+ def __repr__(self):
+ """Return string executable as Python constructor call.
+
+ :returns: Executable constructor call.
+ :rtype: str
+ """
+ return (
+ f"BitCountingGroupList(group_list={self.group_list!r}"
+ f",bits_except_last={self.bits_except_last!r}"
+ f",max_value={self.max_value!r})"
+ )
+
+ def __getitem__(self, index):
+ """Return the group at the index.
+
+ :param index: Index of the group to return.
+ :type index: int
+ :returns: The group at the index.
+ :rtype: BitCountingGroup
+ """
+ return self.group_list[index]
+
+ def __len__(self):
+ """Return the length of the group list.
+
+ :returns: The Length of group_list.
+ :rtype: int
+ """
+ return len(self.group_list)
+
+ def copy(self):
+ """Return a new instance with copied internal state.
+
+ :returns: The copied instance.
+ :rtype: BitCountingGroupList
+ """
+ return self.__class__(
+ group_list=self.group_list, bits_except_last=self.bits_except_last,
+ max_value=self.max_value
+ )
+
+ @property
+ def bits(self):
+ """Return overall bit content of the group list.
+
+ :returns: The overall information content in bits.
+ :rtype: float
+ """
+ if not self.group_list:
+ return 0.0
+ # TODO: Is it worth to cache the overall result?
+ return self.bits_except_last + self.group_list[-1].bits
+
+ def append_group_of_runs(self, runs):
+ """Mutate to add a new group based on the runs, return self.
+
+ The argument is copied before adding to the group list,
+ so further edits do not affect the grup list.
+ The argument can also be a group, only runs from it are used.
+
+ :param runs: Runs to form the next group to be appended to self.
+ :type runs: Union[Iterable[Run], BitCountingGroup]
+ :returns: The updated self.
+ :rtype: BitCountingGroupList
+ """
+ prev_avg = self.group_list[-1].stats.avg if self.group_list else None
+ if isinstance(runs, BitCountingGroup):
+ # It is faster to avoid stats recalculation.
+ new_group = runs.copy()
+ new_group.max_value = self.max_value
+ new_group.prev_avg = prev_avg
+ new_group.cached_bits = None
+ else:
+ new_group = BitCountingGroup(
+ run_list=runs, max_value=self.max_value, prev_avg=prev_avg)
+ self.bits_except_last = self.bits
+ self.group_list.append(new_group)
+ return self
+
+ def append_run_to_to_last_group(self, run):
+ """Mutate to add new run at the end of the last group.
+
+ Basically a one-liner, only returning group list instead of last group.
+
+ :param run: The run value to add to the last group.
+ :type run: Run
+ :returns: The updated self.
+ :rtype: BitCountingGroupList
+ :raises IndexError: If group list is empty, no last group to add to.
+ """
+ self.group_list[-1].append(run)
+ return self
+
+ def extend_runs_to_last_group(self, runs):
+ """Mutate to add new runs to the end of the last group.
+
+ A faster alternative to appending runs one by one in a loop.
+
+ :param runs: The runs to add to the last group.
+ :type runs: Iterable[Run]
+ :returns: The updated self
+ :rtype: BitCountingGroupList
+ :raises IndexError: If group list is empty, no last group to add to.
+ """
+ self.group_list[-1].extend(runs)
+ return self
diff --git a/resources/libraries/python/jumpavg/BitCountingStats.py b/resources/libraries/python/jumpavg/BitCountingStats.py
new file mode 100644
index 0000000000..0addec013b
--- /dev/null
+++ b/resources/libraries/python/jumpavg/BitCountingStats.py
@@ -0,0 +1,169 @@
+# Copyright (c) 2019 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Module holding BitCountingStats class."""
+
+import math
+
+from .AvgStdevStats import AvgStdevStats
+
+
+class BitCountingStats(AvgStdevStats):
+ """Class for statistics which include information content of a group.
+
+ The information content is based on an assumption that the data
+ consists of independent random values from a normal distribution.
+
+ Instances are only statistics, the data itself is stored elsewhere.
+
+ The coding needs to know the previous average, and a maximal value
+ so both values are required as inputs.
+
+ This is a subclass of AvgStdevStats, even though all methods are overriden.
+ Only for_runs method calls the parent implementation, without using super().
+ """
+
+ def __init__(
+ self, size=0, avg=None, stdev=0.0, max_value=None, prev_avg=None):
+ """Construct the stats object by computing from the values needed.
+
+ The values are not sanitized, faulty callers can cause math errors.
+
+ The None values are allowed for stats for zero size data,
+ but such stats can report arbitrary avg and max_value.
+ Stats for nonzero size data cannot contain None,
+ else ValueError is raised.
+
+ The max_value needs to be numeric for nonzero size,
+ but its relations to avg and prev_avg are not examined.
+
+ The bit count is not real, as that would depend on numeric precision
+ (number of significant bits in values).
+ The difference is assumed to be constant per value,
+ which is consistent with Gauss distribution
+ (but not with floating point mechanic).
+ The hope is the difference will have
+ no real impact on the classification procedure.
+
+ :param size: Number of values participating in this group.
+ :param avg: Population average of the participating sample values.
+ :param stdev: Population standard deviation of the sample values.
+ :param max_value: Maximal expected value.
+ TODO: This might be more optimal,
+ but max-invariant algorithm will be nicer.
+ :param prev_avg: Population average of the previous group.
+ If None, no previous average is taken into account.
+ If not None, the given previous average is used to discourage
+ consecutive groups with similar averages
+ (opposite triangle distribution is assumed).
+ :type avg: float
+ :type size: int
+ :type stdev: float
+ :type max_value: Union[float, NoneType]
+ :type prev_avg: Union[float, NoneType]
+ """
+ self.avg = avg
+ self.size = size
+ self.stdev = stdev
+ self.max_value = max_value
+ self.prev_avg = prev_avg
+ # Zero size should in principle have non-zero bits (coding zero size),
+ # but zero allows users to add empty groups without affecting bits.
+ self.bits = 0.0
+ if self.size < 1:
+ return
+ if avg is None:
+ raise ValueError(f"Avg is None: {self!r}")
+ if max_value is None or max_value <= 0.0:
+ raise ValueError(f"Invalid max value: {self!r}")
+ # Length of the sequence must be also counted in bits,
+ # otherwise the message would not be decodable.
+ # Model: probability of k samples is 1/k - 1/(k+1) == 1/k/(k+1)
+ # This is compatible with zero size leading to zero bits.
+ self.bits += math.log(size * (size + 1), 2)
+ if prev_avg is None:
+ # Avg is considered to be uniformly distributed
+ # from zero to max_value.
+ self.bits += math.log(max_value + 1.0, 2)
+ else:
+ # Opposite triangle distribution with minimum.
+ self.bits += math.log(
+ max_value * (max_value + 1) / (abs(avg - prev_avg) + 1), 2)
+ if self.size < 2:
+ return
+ # Stdev is considered to be uniformly distributed
+ # from zero to max_value. That is quite a bad expectation,
+ # but resilient to negative samples etc.
+ self.bits += math.log(max_value + 1.0, 2)
+ # Now we know the samples lie on sphere in size-1 dimensions.
+ # So it is (size-2)-sphere, with radius^2 == stdev^2 * size.
+ # https://en.wikipedia.org/wiki/N-sphere
+ sphere_area_ln = math.log(2) + math.log(math.pi) * ((size - 1) / 2.0)
+ sphere_area_ln -= math.lgamma((size - 1) / 2.0)
+ sphere_area_ln += math.log(stdev + 1.0) * (size - 2)
+ sphere_area_ln += math.log(size) * ((size - 2) / 2.0)
+ self.bits += sphere_area_ln / math.log(2)
+
+ def __str__(self):
+ """Return string with human readable description of the group.
+
+ :returns: Readable description.
+ :rtype: str
+ """
+ return (
+ f"size={self.size} avg={self.avg} stdev={self.stdev}"
+ f" bits={self.bits}"
+ )
+
+ def __repr__(self):
+ """Return string executable as Python constructor call.
+
+ :returns: Executable constructor call.
+ :rtype: str
+ """
+ return (
+ f"BitCountingStats(size={self.size!r},avg={self.avg!r}"
+ f",stdev={self.stdev!r},max_value={self.max_value!r}"
+ f",prev_avg={self.prev_avg!r})"
+ )
+
+ @classmethod
+ def for_runs(cls, runs, max_value=None, prev_avg=None):
+ """Return new stats instance describing the sequence of runs.
+
+ If you want to append data to existing stats object,
+ you can simply use the stats object as the first run.
+
+ Instead of a verb, "for" is used to start this method name,
+ to signify the result contains less information than the input data.
+
+ The two optional values can come from outside of the runs provided.
+
+ The max_value cannot be None for non-zero size data.
+ The implementation does not check if no datapoint exceeds max_value.
+
+ TODO: Document the behavior for zero size result.
+
+ :param runs: Sequence of data to describe by the new metadata.
+ :param max_value: Maximal expected value.
+ :param prev_avg: Population average of the previous group, if any.
+ :type runs: Iterable[Union[float, AvgStdevStats]]
+ :type max_value: Union[float, NoneType]
+ :type prev_avg: Union[float, NoneType]
+ :returns: The new stats instance.
+ :rtype: cls
+ """
+ asd = AvgStdevStats.for_runs(runs)
+ ret_obj = cls(size=asd.size, avg=asd.avg, stdev=asd.stdev,
+ max_value=max_value, prev_avg=prev_avg)
+ return ret_obj
diff --git a/resources/libraries/python/jumpavg/__init__.py b/resources/libraries/python/jumpavg/__init__.py
new file mode 100644
index 0000000000..cb8b3df43d
--- /dev/null
+++ b/resources/libraries/python/jumpavg/__init__.py
@@ -0,0 +1,22 @@
+# Copyright (c) 2019 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+__init__ file for "jumpavg" Python package.
+"""
+
+from .AvgStdevStats import AvgStdevStats
+from .BitCountingStats import BitCountingStats
+from .BitCountingGroup import BitCountingGroup
+from .BitCountingGroupList import BitCountingGroupList
+from .classify import classify
diff --git a/resources/libraries/python/jumpavg/classify.py b/resources/libraries/python/jumpavg/classify.py
new file mode 100644
index 0000000000..5f5ce6160c
--- /dev/null
+++ b/resources/libraries/python/jumpavg/classify.py
@@ -0,0 +1,76 @@
+# Copyright (c) 2019 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Module holding the classify function
+
+Classification os one of primary purposes of this package.
+
+Minimal message length principle is used
+for grouping results into the list of groups,
+assuming each group is a population of different Gaussian distribution.
+"""
+
+from .AvgStdevStats import AvgStdevStats
+from .BitCountingGroupList import BitCountingGroupList
+
+
+def classify(values):
+ """Return the values in groups of optimal bit count.
+
+ Here, a value is either a float, or an iterable of floats.
+ Such iterables represent an undivisible sequence of floats.
+
+ Internally, such sequence is replaced by AvgStdevStats
+ after maximal value is found.
+
+ :param values: Sequence of runs to classify.
+ :type values: Iterable[Union[float, Iterable[float]]]
+ :returns: Classified group list.
+ :rtype: BitCountingGroupList
+ """
+ processed_values = list()
+ max_value = 0.0
+ for value in values:
+ if isinstance(value, (float, int)):
+ if value > max_value:
+ max_value = value
+ processed_values.append(value)
+ else:
+ for subvalue in value:
+ if subvalue > max_value:
+ max_value = subvalue
+ processed_values.append(AvgStdevStats.for_runs(value))
+ open_at = list()
+ closed_before = [BitCountingGroupList(max_value=max_value)]
+ for index, value in enumerate(processed_values):
+ newly_open = closed_before[index].copy()
+ newly_open.append_group_of_runs([value])
+ open_at.append(newly_open)
+ record_group_list = newly_open
+ for previous_index, old_open in enumerate(open_at[:index]):
+ new_open = old_open.copy().append_run_to_to_last_group(value)
+ open_at[previous_index] = new_open
+ if new_open.bits < record_group_list.bits:
+ record_group_list = new_open
+ closed_before.append(record_group_list)
+ partition = closed_before[-1]
+ previous_average = partition[0].stats.avg
+ for group in partition:
+ if group.stats.avg == previous_average:
+ group.comment = u"normal"
+ elif group.stats.avg < previous_average:
+ group.comment = u"regression"
+ elif group.stats.avg > previous_average:
+ group.comment = u"progression"
+ previous_average = group.stats.avg
+ return partition