diff options
author | Vratko Polak <vrpolak@cisco.com> | 2019-12-02 18:38:44 +0100 |
---|---|---|
committer | Vratko Polak <vrpolak@cisco.com> | 2019-12-02 18:57:32 +0100 |
commit | 627cddca1d64edb8475407a1524efb2a22249a25 (patch) | |
tree | 919d0140958f94dda23d7338195fc7658940e5c3 /resources/libraries | |
parent | 9a936616eea7236ca8183f632d01d62109e12b29 (diff) |
Refactor jumpavg to be more readable and usable
This is the python3 change, the python2 is still used for testing bisect.
+ New version is 0.2.0 due to amount o API changes.
+ Jumpavg is now part of CSIT resource package tree.
+ Perpatch migrated to new jumpavg.
- PAL NOT updated (the update moved to a different Change).
Change-Id: I7d7a8bf8a411196c20c2a40a8c64478d6709bc07
Signed-off-by: Vratko Polak <vrpolak@cisco.com>
Diffstat (limited to 'resources/libraries')
-rw-r--r-- | resources/libraries/bash/function/per_patch.sh | 7 | ||||
-rw-r--r-- | resources/libraries/python/jumpavg/AvgStdevStats.py | 113 | ||||
-rw-r--r-- | resources/libraries/python/jumpavg/BitCountingGroup.py | 173 | ||||
-rw-r--r-- | resources/libraries/python/jumpavg/BitCountingGroupList.py | 185 | ||||
-rw-r--r-- | resources/libraries/python/jumpavg/BitCountingStats.py | 169 | ||||
-rw-r--r-- | resources/libraries/python/jumpavg/__init__.py | 22 | ||||
-rw-r--r-- | resources/libraries/python/jumpavg/classify.py | 76 |
7 files changed, 742 insertions, 3 deletions
diff --git a/resources/libraries/bash/function/per_patch.sh b/resources/libraries/bash/function/per_patch.sh index 919789b5f4..ea7ea4f837 100644 --- a/resources/libraries/bash/function/per_patch.sh +++ b/resources/libraries/bash/function/per_patch.sh @@ -108,11 +108,12 @@ function compare_test_results () { set -exuo pipefail cd "${VPP_DIR}" || die "Change directory operation failed." - # Reusing CSIT main virtualenv. + # Ply is installed as system level package, but not seen for some reason. pip3 install -r "${PYTHON_SCRIPTS_DIR}/perpatch_requirements.txt" || { - die "Perpatch Python requirements installation failed." + die "Compare script Python requirements installation failed." } - python3 "${PYTHON_SCRIPTS_DIR}/compare_perpatch.py" + # Reusing CSIT main virtualenv. + python3 "${TOOLS_DIR}/integrated/compare_perpatch.py" # The exit code determines the vote result. } diff --git a/resources/libraries/python/jumpavg/AvgStdevStats.py b/resources/libraries/python/jumpavg/AvgStdevStats.py new file mode 100644 index 0000000000..9a8decd932 --- /dev/null +++ b/resources/libraries/python/jumpavg/AvgStdevStats.py @@ -0,0 +1,113 @@ +# Copyright (c) 2019 Cisco and/or its affiliates. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Module holding AvgStdevStats class.""" + +import math + + +class AvgStdevStats: + """Class for statistics which include average and stdev of a group. + + Contrary to other stats types, adding values to the group + is computationally light without any caching. + + Instances are only statistics, the data itself is stored elsewhere. + """ + + def __init__(self, size=0, avg=0.0, stdev=0.0): + """Construct the stats object by storing the values needed. + + Each value has to be numeric. + The values are not sanitized depending on size, wrong initialization + can cause delayed math errors. + + :param size: Number of values participating in this group. + :param avg: Population average of the participating sample values. + :param stdev: Population standard deviation of the sample values. + :type size: int + :type avg: float + :type stdev: float + """ + self.size = size + self.avg = avg + self.stdev = stdev + + def __str__(self): + """Return string with human readable description of the group. + + :returns: Readable description. + :rtype: str + """ + return f"size={self.size} avg={self.avg} stdev={self.stdev}" + + def __repr__(self): + """Return string executable as Python constructor call. + + :returns: Executable constructor call. + :rtype: str + """ + return ( + f"AvgStdevStats(size={self.size!r},avg={self.avg!r}" + f",stdev={self.stdev!r})" + ) + + @classmethod + def for_runs(cls, runs): + """Return new stats instance describing the sequence of runs. + + If you want to append data to existing stats object, + you can simply use the stats object as the first run. + + Instead of a verb, "for" is used to start this method name, + to signify the result contains less information than the input data. + + Here, Run is a hypothetical abstract class, an union of float and cls. + Defining that as a real abstract class in Python 2 is too much hassle. + + :param runs: Sequence of data to describe by the new metadata. + :type runs: Iterable[Union[float, cls]] + :returns: The new stats instance. + :rtype: cls + """ + # Using Welford method to be more resistant to rounding errors. + # Adapted from code for sample standard deviation at: + # https://www.johndcook.com/blog/standard_deviation/ + # The logic of plus operator is taken from + # https://www.johndcook.com/blog/skewness_kurtosis/ + total_size = 0 + total_avg = 0.0 + moment_2 = 0.0 + for run in runs: + if isinstance(run, (float, int)): + run_size = 1 + run_avg = run + run_stdev = 0.0 + else: + run_size = run.size + run_avg = run.avg + run_stdev = run.stdev + old_total_size = total_size + delta = run_avg - total_avg + total_size += run_size + total_avg += delta * run_size / total_size + moment_2 += run_stdev * run_stdev * run_size + moment_2 += delta * delta * old_total_size * run_size / total_size + if total_size < 1: + # Avoid division by zero. + return cls(size=0) + # TODO: Is it worth tracking moment_2 instead, and compute and cache + # stdev on demand, just to possibly save some sqrt calls? + total_stdev = math.sqrt(moment_2 / total_size) + ret_obj = cls(size=total_size, avg=total_avg, stdev=total_stdev) + return ret_obj diff --git a/resources/libraries/python/jumpavg/BitCountingGroup.py b/resources/libraries/python/jumpavg/BitCountingGroup.py new file mode 100644 index 0000000000..0c1aabba30 --- /dev/null +++ b/resources/libraries/python/jumpavg/BitCountingGroup.py @@ -0,0 +1,173 @@ +# Copyright (c) 2019 Cisco and/or its affiliates. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Module holding BitCountingGroup class.""" + +import copy + +from .AvgStdevStats import AvgStdevStats +from .BitCountingStats import BitCountingStats + + +class BitCountingGroup: + # TODO: Inherit from collections.abc.Sequence in Python 3. + """Group of runs which tracks bit count in an efficient manner. + + This class contains methods that mutate the internal state, + use copy() method to save the previous state. + + The Sequence-like access is related to the list of runs, + for example group[0] returns the first run in the list. + Writable list-like methods are not implemented. + + As the group bit count depends on previous average + and overall maximal value, those values are assumed + to be known beforehand (and immutable). + + As the caller is allowed to divide runs into groups in any way, + a method to add a single run in an efficient manner is provided. + """ + + def __init__(self, run_list=None, stats=None, bits=None, + max_value=None, prev_avg=None, comment="unknown"): + """Set the internal state and partially the stats. + + A "group" stands for an Iterable of runs, where "run" is either + a float value, or a stats-like object (only size, avg and stdev + are accessed). Run is a hypothetical abstract class, + defining it in Python 2 is too much hassle. + + Only a copy of the run list argument value is stored in the instance, + so it is not a problem if the value object is mutated afterwards. + + It is not verified whether the user provided values are valid, + e.g. whether the stats and bits values reflect the runs. + + :param run_list: List of run to compose into this group. Default: empty. + :param stats: Stats object used for computing bits. + :param bits: Cached value of information content. + :param max_value: Maximal sample value to be used for computing. + :param prev_avg: Average of the previous group, affects bits. + :param comment: Any string giving more info, e.g. "regression". + :type run_list: Iterable[Run] + :type stats: Optional[AvgStdevStats] + :type bits: Optional[float] + :type max_value: float + :type prev_avg: Optional[float] + :type comment: str + """ + self.run_list = copy.deepcopy(run_list) if run_list else list() + self.stats = stats + self.cached_bits = bits + self.max_value = max_value + self.prev_avg = prev_avg + self.comment = comment + if self.stats is None: + self.stats = AvgStdevStats.for_runs(self.run_list) + + def __str__(self): + """Return string with human readable description of the group. + + :returns: Readable description. + :rtype: str + """ + return f"stats={self.stats} bits={self.cached_bits}" + + def __repr__(self): + """Return string executable as Python constructor call. + + :returns: Executable constructor call. + :rtype: str + """ + return ( + f"BitCountingGroup(run_list={self.run_list!r},stats={self.stats!r}" + f",bits={self.cached_bits!r},max_value={self.max_value!r}" + f",prev_avg={self.prev_avg!r},comment={self.comment!r})" + ) + + def __getitem__(self, index): + """Return the run at the index. + + :param index: Index of the run to return. + :type index: int + :returns: The run at the index. + :rtype: Run + """ + return self.run_list[index] + + def __len__(self): + """Return the number of runs in the group. + + :returns: The Length of run_list. + :rtype: int + """ + return len(self.run_list) + + def copy(self): + """Return a new instance with copied internal state. + + :returns: The copied instance. + :rtype: BitCountingGroup + """ + stats = AvgStdevStats.for_runs([self.stats]) + return self.__class__( + run_list=self.run_list, stats=stats, bits=self.cached_bits, + max_value=self.max_value, prev_avg=self.prev_avg, + comment=self.comment) + + @property + def bits(self): + """Return overall bit content of the group list. + + If not cached, compute from stats and cache. + + :returns: The overall information content in bits. + :rtype: float + """ + if self.cached_bits is None: + self.cached_bits = BitCountingStats.for_runs( + [self.stats], self.max_value, self.prev_avg).bits + return self.cached_bits + + def append(self, run): + """Mutate to add the new run, return self. + + Stats are updated, but old bits value is deleted from cache. + + :param run: The run value to add to the group. + :type value: Run + :returns: The updated self. + :rtype: BitCountingGroup + """ + self.run_list.append(run) + self.stats = AvgStdevStats.for_runs([self.stats, run]) + self.cached_bits = None + return self + + def extend(self, runs): + """Mutate to add the new runs, return self. + + This is saves small amount of computation + compared to adding runs one by one in a loop. + + Stats are updated, but old bits value is deleted from cache. + + :param runs: The runs to add to the group. + :type value: Iterable[Run] + :returns: The updated self. + :rtype: BitCountingGroup + """ + self.run_list.extend(runs) + self.stats = AvgStdevStats.for_runs([self.stats] + runs) + self.cached_bits = None + return self diff --git a/resources/libraries/python/jumpavg/BitCountingGroupList.py b/resources/libraries/python/jumpavg/BitCountingGroupList.py new file mode 100644 index 0000000000..bcc5e43267 --- /dev/null +++ b/resources/libraries/python/jumpavg/BitCountingGroupList.py @@ -0,0 +1,185 @@ +# Copyright (c) 2019 Cisco and/or its affiliates. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Module holding BitCountingGroupList class.""" + +import copy + +from .BitCountingGroup import BitCountingGroup + + +class BitCountingGroupList: + # TODO: Inherit from collections.abc.Sequence in Python 3. + """List of data groups which tracks overall bit count. + + The Sequence-like access is related to the list of groups, + for example group_list[0] returns the first group in the list. + Writable list-like methods are not implemented. + + The overall bit count is the sum of bit counts of each group. + Group is a sequence of data samples accompanied by their stats. + Different partitioning of data samples into the groups + results in different overall bit count. + This can be used to group samples in various contexts. + + As the group bit count depends on previous average + and overall maximal value, order of groups is important. + Having the logic encapsulated here spares the caller + the effort to pass averages around. + + The data can be only added, and there is some logic to skip + recalculations if the bit count is not needed. + """ + + def __init__(self, group_list=None, bits_except_last=0.0, max_value=None): + """Set the internal state without any calculations. + + The group list argument is copied deeply, so it is not a problem + if the value object is mutated afterwards. + + A "group" stands for an Iterable of runs, where "run" is either + a float value, or a stats-like object (only size, avg and stdev + are accessed). Run is a hypothetical abstract class, + defining it in Python 2 is too much hassle. + + It is not verified whether the user provided values are valid, + e.g. whether the cached bits values make sense. + + The max_value is required and immutable, + it is recommended the callers find their maximum beforehand. + + :param group_list: List of groups to compose this group list (or empty). + :param bits_except_last: Partial sum of all but one group bits. + :param max_value: Maximal sample value to base bits computation on. + :type group_list: Iterable[BitCountingGroup] + :type bits_except_last: float + :type max_value: float + """ + self.group_list = copy.deepcopy(group_list) if group_list else list() + self.bits_except_last = bits_except_last + self.max_value = max_value + + def __str__(self): + """Return string with human readable description of the group list. + + :returns: Readable description. + :rtype: str + """ + return u"group_list={self.group_list} bits={self.bits}" + + def __repr__(self): + """Return string executable as Python constructor call. + + :returns: Executable constructor call. + :rtype: str + """ + return ( + f"BitCountingGroupList(group_list={self.group_list!r}" + f",bits_except_last={self.bits_except_last!r}" + f",max_value={self.max_value!r})" + ) + + def __getitem__(self, index): + """Return the group at the index. + + :param index: Index of the group to return. + :type index: int + :returns: The group at the index. + :rtype: BitCountingGroup + """ + return self.group_list[index] + + def __len__(self): + """Return the length of the group list. + + :returns: The Length of group_list. + :rtype: int + """ + return len(self.group_list) + + def copy(self): + """Return a new instance with copied internal state. + + :returns: The copied instance. + :rtype: BitCountingGroupList + """ + return self.__class__( + group_list=self.group_list, bits_except_last=self.bits_except_last, + max_value=self.max_value + ) + + @property + def bits(self): + """Return overall bit content of the group list. + + :returns: The overall information content in bits. + :rtype: float + """ + if not self.group_list: + return 0.0 + # TODO: Is it worth to cache the overall result? + return self.bits_except_last + self.group_list[-1].bits + + def append_group_of_runs(self, runs): + """Mutate to add a new group based on the runs, return self. + + The argument is copied before adding to the group list, + so further edits do not affect the grup list. + The argument can also be a group, only runs from it are used. + + :param runs: Runs to form the next group to be appended to self. + :type runs: Union[Iterable[Run], BitCountingGroup] + :returns: The updated self. + :rtype: BitCountingGroupList + """ + prev_avg = self.group_list[-1].stats.avg if self.group_list else None + if isinstance(runs, BitCountingGroup): + # It is faster to avoid stats recalculation. + new_group = runs.copy() + new_group.max_value = self.max_value + new_group.prev_avg = prev_avg + new_group.cached_bits = None + else: + new_group = BitCountingGroup( + run_list=runs, max_value=self.max_value, prev_avg=prev_avg) + self.bits_except_last = self.bits + self.group_list.append(new_group) + return self + + def append_run_to_to_last_group(self, run): + """Mutate to add new run at the end of the last group. + + Basically a one-liner, only returning group list instead of last group. + + :param run: The run value to add to the last group. + :type run: Run + :returns: The updated self. + :rtype: BitCountingGroupList + :raises IndexError: If group list is empty, no last group to add to. + """ + self.group_list[-1].append(run) + return self + + def extend_runs_to_last_group(self, runs): + """Mutate to add new runs to the end of the last group. + + A faster alternative to appending runs one by one in a loop. + + :param runs: The runs to add to the last group. + :type runs: Iterable[Run] + :returns: The updated self + :rtype: BitCountingGroupList + :raises IndexError: If group list is empty, no last group to add to. + """ + self.group_list[-1].extend(runs) + return self diff --git a/resources/libraries/python/jumpavg/BitCountingStats.py b/resources/libraries/python/jumpavg/BitCountingStats.py new file mode 100644 index 0000000000..0addec013b --- /dev/null +++ b/resources/libraries/python/jumpavg/BitCountingStats.py @@ -0,0 +1,169 @@ +# Copyright (c) 2019 Cisco and/or its affiliates. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Module holding BitCountingStats class.""" + +import math + +from .AvgStdevStats import AvgStdevStats + + +class BitCountingStats(AvgStdevStats): + """Class for statistics which include information content of a group. + + The information content is based on an assumption that the data + consists of independent random values from a normal distribution. + + Instances are only statistics, the data itself is stored elsewhere. + + The coding needs to know the previous average, and a maximal value + so both values are required as inputs. + + This is a subclass of AvgStdevStats, even though all methods are overriden. + Only for_runs method calls the parent implementation, without using super(). + """ + + def __init__( + self, size=0, avg=None, stdev=0.0, max_value=None, prev_avg=None): + """Construct the stats object by computing from the values needed. + + The values are not sanitized, faulty callers can cause math errors. + + The None values are allowed for stats for zero size data, + but such stats can report arbitrary avg and max_value. + Stats for nonzero size data cannot contain None, + else ValueError is raised. + + The max_value needs to be numeric for nonzero size, + but its relations to avg and prev_avg are not examined. + + The bit count is not real, as that would depend on numeric precision + (number of significant bits in values). + The difference is assumed to be constant per value, + which is consistent with Gauss distribution + (but not with floating point mechanic). + The hope is the difference will have + no real impact on the classification procedure. + + :param size: Number of values participating in this group. + :param avg: Population average of the participating sample values. + :param stdev: Population standard deviation of the sample values. + :param max_value: Maximal expected value. + TODO: This might be more optimal, + but max-invariant algorithm will be nicer. + :param prev_avg: Population average of the previous group. + If None, no previous average is taken into account. + If not None, the given previous average is used to discourage + consecutive groups with similar averages + (opposite triangle distribution is assumed). + :type avg: float + :type size: int + :type stdev: float + :type max_value: Union[float, NoneType] + :type prev_avg: Union[float, NoneType] + """ + self.avg = avg + self.size = size + self.stdev = stdev + self.max_value = max_value + self.prev_avg = prev_avg + # Zero size should in principle have non-zero bits (coding zero size), + # but zero allows users to add empty groups without affecting bits. + self.bits = 0.0 + if self.size < 1: + return + if avg is None: + raise ValueError(f"Avg is None: {self!r}") + if max_value is None or max_value <= 0.0: + raise ValueError(f"Invalid max value: {self!r}") + # Length of the sequence must be also counted in bits, + # otherwise the message would not be decodable. + # Model: probability of k samples is 1/k - 1/(k+1) == 1/k/(k+1) + # This is compatible with zero size leading to zero bits. + self.bits += math.log(size * (size + 1), 2) + if prev_avg is None: + # Avg is considered to be uniformly distributed + # from zero to max_value. + self.bits += math.log(max_value + 1.0, 2) + else: + # Opposite triangle distribution with minimum. + self.bits += math.log( + max_value * (max_value + 1) / (abs(avg - prev_avg) + 1), 2) + if self.size < 2: + return + # Stdev is considered to be uniformly distributed + # from zero to max_value. That is quite a bad expectation, + # but resilient to negative samples etc. + self.bits += math.log(max_value + 1.0, 2) + # Now we know the samples lie on sphere in size-1 dimensions. + # So it is (size-2)-sphere, with radius^2 == stdev^2 * size. + # https://en.wikipedia.org/wiki/N-sphere + sphere_area_ln = math.log(2) + math.log(math.pi) * ((size - 1) / 2.0) + sphere_area_ln -= math.lgamma((size - 1) / 2.0) + sphere_area_ln += math.log(stdev + 1.0) * (size - 2) + sphere_area_ln += math.log(size) * ((size - 2) / 2.0) + self.bits += sphere_area_ln / math.log(2) + + def __str__(self): + """Return string with human readable description of the group. + + :returns: Readable description. + :rtype: str + """ + return ( + f"size={self.size} avg={self.avg} stdev={self.stdev}" + f" bits={self.bits}" + ) + + def __repr__(self): + """Return string executable as Python constructor call. + + :returns: Executable constructor call. + :rtype: str + """ + return ( + f"BitCountingStats(size={self.size!r},avg={self.avg!r}" + f",stdev={self.stdev!r},max_value={self.max_value!r}" + f",prev_avg={self.prev_avg!r})" + ) + + @classmethod + def for_runs(cls, runs, max_value=None, prev_avg=None): + """Return new stats instance describing the sequence of runs. + + If you want to append data to existing stats object, + you can simply use the stats object as the first run. + + Instead of a verb, "for" is used to start this method name, + to signify the result contains less information than the input data. + + The two optional values can come from outside of the runs provided. + + The max_value cannot be None for non-zero size data. + The implementation does not check if no datapoint exceeds max_value. + + TODO: Document the behavior for zero size result. + + :param runs: Sequence of data to describe by the new metadata. + :param max_value: Maximal expected value. + :param prev_avg: Population average of the previous group, if any. + :type runs: Iterable[Union[float, AvgStdevStats]] + :type max_value: Union[float, NoneType] + :type prev_avg: Union[float, NoneType] + :returns: The new stats instance. + :rtype: cls + """ + asd = AvgStdevStats.for_runs(runs) + ret_obj = cls(size=asd.size, avg=asd.avg, stdev=asd.stdev, + max_value=max_value, prev_avg=prev_avg) + return ret_obj diff --git a/resources/libraries/python/jumpavg/__init__.py b/resources/libraries/python/jumpavg/__init__.py new file mode 100644 index 0000000000..cb8b3df43d --- /dev/null +++ b/resources/libraries/python/jumpavg/__init__.py @@ -0,0 +1,22 @@ +# Copyright (c) 2019 Cisco and/or its affiliates. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +__init__ file for "jumpavg" Python package. +""" + +from .AvgStdevStats import AvgStdevStats +from .BitCountingStats import BitCountingStats +from .BitCountingGroup import BitCountingGroup +from .BitCountingGroupList import BitCountingGroupList +from .classify import classify diff --git a/resources/libraries/python/jumpavg/classify.py b/resources/libraries/python/jumpavg/classify.py new file mode 100644 index 0000000000..5f5ce6160c --- /dev/null +++ b/resources/libraries/python/jumpavg/classify.py @@ -0,0 +1,76 @@ +# Copyright (c) 2019 Cisco and/or its affiliates. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Module holding the classify function + +Classification os one of primary purposes of this package. + +Minimal message length principle is used +for grouping results into the list of groups, +assuming each group is a population of different Gaussian distribution. +""" + +from .AvgStdevStats import AvgStdevStats +from .BitCountingGroupList import BitCountingGroupList + + +def classify(values): + """Return the values in groups of optimal bit count. + + Here, a value is either a float, or an iterable of floats. + Such iterables represent an undivisible sequence of floats. + + Internally, such sequence is replaced by AvgStdevStats + after maximal value is found. + + :param values: Sequence of runs to classify. + :type values: Iterable[Union[float, Iterable[float]]] + :returns: Classified group list. + :rtype: BitCountingGroupList + """ + processed_values = list() + max_value = 0.0 + for value in values: + if isinstance(value, (float, int)): + if value > max_value: + max_value = value + processed_values.append(value) + else: + for subvalue in value: + if subvalue > max_value: + max_value = subvalue + processed_values.append(AvgStdevStats.for_runs(value)) + open_at = list() + closed_before = [BitCountingGroupList(max_value=max_value)] + for index, value in enumerate(processed_values): + newly_open = closed_before[index].copy() + newly_open.append_group_of_runs([value]) + open_at.append(newly_open) + record_group_list = newly_open + for previous_index, old_open in enumerate(open_at[:index]): + new_open = old_open.copy().append_run_to_to_last_group(value) + open_at[previous_index] = new_open + if new_open.bits < record_group_list.bits: + record_group_list = new_open + closed_before.append(record_group_list) + partition = closed_before[-1] + previous_average = partition[0].stats.avg + for group in partition: + if group.stats.avg == previous_average: + group.comment = u"normal" + elif group.stats.avg < previous_average: + group.comment = u"regression" + elif group.stats.avg > previous_average: + group.comment = u"progression" + previous_average = group.stats.avg + return partition |