From 6149ec451efff00068f38e3343e66cdec7b943f4 Mon Sep 17 00:00:00 2001 From: Vratko Polak Date: Thu, 14 Jun 2018 14:04:03 +0200 Subject: CSIT-1110: Use jumpavg library from pip + Move the jumpavg library code to separate directory. - Bump to 0.1.2 has to be done later. Change-Id: I9722ede48f00e99eeb68ca3f91e0bdeee2937973 Signed-off-by: Vratko Polak --- PyPI/jumpavg/.gitignore | 104 +++++++++++ PyPI/jumpavg/LICENSE.txt | 201 +++++++++++++++++++++ PyPI/jumpavg/MANIFEST.in | 6 + PyPI/jumpavg/README.rst | 28 +++ PyPI/jumpavg/jumpavg/AbstractGroupClassifier.py | 40 ++++ PyPI/jumpavg/jumpavg/AbstractGroupMetadata.py | 42 +++++ PyPI/jumpavg/jumpavg/AvgStdevMetadata.py | 54 ++++++ PyPI/jumpavg/jumpavg/AvgStdevMetadataFactory.py | 51 ++++++ PyPI/jumpavg/jumpavg/BitCountingClassifier.py | 70 +++++++ PyPI/jumpavg/jumpavg/BitCountingGroup.py | 50 +++++ PyPI/jumpavg/jumpavg/BitCountingGroupList.py | 87 +++++++++ PyPI/jumpavg/jumpavg/BitCountingMetadata.py | 109 +++++++++++ PyPI/jumpavg/jumpavg/BitCountingMetadataFactory.py | 85 +++++++++ .../jumpavg/ClassifiedBitCountingMetadata.py | 73 ++++++++ PyPI/jumpavg/jumpavg/ClassifiedMetadataFactory.py | 42 +++++ PyPI/jumpavg/jumpavg/RunGroup.py | 34 ++++ PyPI/jumpavg/jumpavg/__init__.py | 16 ++ PyPI/jumpavg/setup.cfg | 7 + PyPI/jumpavg/setup.py | 52 ++++++ 19 files changed, 1151 insertions(+) create mode 100644 PyPI/jumpavg/.gitignore create mode 100644 PyPI/jumpavg/LICENSE.txt create mode 100644 PyPI/jumpavg/MANIFEST.in create mode 100644 PyPI/jumpavg/README.rst create mode 100644 PyPI/jumpavg/jumpavg/AbstractGroupClassifier.py create mode 100644 PyPI/jumpavg/jumpavg/AbstractGroupMetadata.py create mode 100644 PyPI/jumpavg/jumpavg/AvgStdevMetadata.py create mode 100644 PyPI/jumpavg/jumpavg/AvgStdevMetadataFactory.py create mode 100644 PyPI/jumpavg/jumpavg/BitCountingClassifier.py create mode 100644 PyPI/jumpavg/jumpavg/BitCountingGroup.py create mode 100644 PyPI/jumpavg/jumpavg/BitCountingGroupList.py create mode 100644 PyPI/jumpavg/jumpavg/BitCountingMetadata.py create mode 100644 PyPI/jumpavg/jumpavg/BitCountingMetadataFactory.py create mode 100644 PyPI/jumpavg/jumpavg/ClassifiedBitCountingMetadata.py create mode 100644 PyPI/jumpavg/jumpavg/ClassifiedMetadataFactory.py create mode 100644 PyPI/jumpavg/jumpavg/RunGroup.py create mode 100644 PyPI/jumpavg/jumpavg/__init__.py create mode 100644 PyPI/jumpavg/setup.cfg create mode 100644 PyPI/jumpavg/setup.py (limited to 'PyPI/jumpavg') diff --git a/PyPI/jumpavg/.gitignore b/PyPI/jumpavg/.gitignore new file mode 100644 index 0000000000..894a44cc06 --- /dev/null +++ b/PyPI/jumpavg/.gitignore @@ -0,0 +1,104 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ diff --git a/PyPI/jumpavg/LICENSE.txt b/PyPI/jumpavg/LICENSE.txt new file mode 100644 index 0000000000..261eeb9e9f --- /dev/null +++ b/PyPI/jumpavg/LICENSE.txt @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/PyPI/jumpavg/MANIFEST.in b/PyPI/jumpavg/MANIFEST.in new file mode 100644 index 0000000000..58073271d1 --- /dev/null +++ b/PyPI/jumpavg/MANIFEST.in @@ -0,0 +1,6 @@ + +# Include the README +include README.rst + +# Include the license file +include LICENSE.txt diff --git a/PyPI/jumpavg/README.rst b/PyPI/jumpavg/README.rst new file mode 100644 index 0000000000..2bdde2b9d0 --- /dev/null +++ b/PyPI/jumpavg/README.rst @@ -0,0 +1,28 @@ +Jumpavg library +=============== + +Origins +------- + +This library was developed as anomaly detection logic +for PAL_ (Presentation and Analysis Layer) +of CSIT_ (Continuous System and Integration Testing) +project of fd.io_ (Fast Data), one of LFN_ +(Linux Foundation Networking) projects. + +In order to make this code available in PyPI_ (Python Package Index), +the setuputils stuff has been added, +and the code has been moved into a separate directory_, +in order to not intervere of otherwise tightly coupled CSIT code. + +Usage +----- + +TODO. + +.. _PAL: https://wiki.fd.io/view/CSIT/Design_Optimizations#Presentation_and_Analytics_Layer +.. _CSIT: https://wiki.fd.io/view/CSIT +.. _fd.io: https://fd.io/ +.. _LFN: https://www.linuxfoundation.org/projects/networking/ +.. _PyPI: https://pypi.org/ +.. _directory: https://gerrit.fd.io/r/gitweb?p=csit.git;a=tree;f=PyPI/jumpavg;hb=refs/heads/master diff --git a/PyPI/jumpavg/jumpavg/AbstractGroupClassifier.py b/PyPI/jumpavg/jumpavg/AbstractGroupClassifier.py new file mode 100644 index 0000000000..2612b009da --- /dev/null +++ b/PyPI/jumpavg/jumpavg/AbstractGroupClassifier.py @@ -0,0 +1,40 @@ +# Copyright (c) 2018 Cisco and/or its affiliates. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Module holding AbstractGroupClassifier class.""" + +from abc import ABCMeta, abstractmethod + + +class AbstractGroupClassifier(object): + """Abstract class defining API for classifier. + + The classifier is an object with classify() method + which divides data into groups containing metadata. + """ + + __metaclass__ = ABCMeta + + @abstractmethod + def classify(self, values): + """Divide values into consecutive groups with metadata. + + The metadata does not need to follow any specific rules, + although progression/regression/outlier description would be fine. + + :param values: Sequence of runs to classify. + :type values: Iterable of float or of AvgStdevMetadata + :returns: Classified groups + :rtype: Iterable of RunGroup + """ + pass diff --git a/PyPI/jumpavg/jumpavg/AbstractGroupMetadata.py b/PyPI/jumpavg/jumpavg/AbstractGroupMetadata.py new file mode 100644 index 0000000000..3235dbd485 --- /dev/null +++ b/PyPI/jumpavg/jumpavg/AbstractGroupMetadata.py @@ -0,0 +1,42 @@ +# Copyright (c) 2018 Cisco and/or its affiliates. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Module holding AbstractGroupMetadata class.""" + +from abc import ABCMeta, abstractmethod + + +class AbstractGroupMetadata(object): + """Abstract classdefining API for metadata. + + At this level, only __str__() and __repr() methods are required.""" + + __metaclass__ = ABCMeta + + @abstractmethod + def __str__(self): + """Return string with human readable description of the group. + + :returns: Readable description. + :rtype: str + """ + pass + + @abstractmethod + def __repr__(self): + """Return string executable as Python constructor call. + + :returns: Executable constructor call. + :rtype: str + """ + pass diff --git a/PyPI/jumpavg/jumpavg/AvgStdevMetadata.py b/PyPI/jumpavg/jumpavg/AvgStdevMetadata.py new file mode 100644 index 0000000000..efc1a90cd4 --- /dev/null +++ b/PyPI/jumpavg/jumpavg/AvgStdevMetadata.py @@ -0,0 +1,54 @@ +# Copyright (c) 2018 Cisco and/or its affiliates. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Module for holding AvgStdevMetadata class.""" + +from AbstractGroupMetadata import AbstractGroupMetadata + + +class AvgStdevMetadata(AbstractGroupMetadata): + """Class for metadata specifying the average and standard deviation.""" + + def __init__(self, size=0, avg=0.0, stdev=0.0): + """Construct the metadata by setting the values needed. + + The values are sanitized, so faulty callers to not cause math errors. + + :param size: Number of values participating in this group. + :param avg: Population average of the participating sample values. + :param stdev: Population standard deviation of the sample values. + :type size: int + :type avg: float + :type stdev: float + """ + self.size = size if size >= 0 else 0 + self.avg = avg if size >= 1 else 0.0 + self.stdev = stdev if size >= 2 else 0.0 + + def __str__(self): + """Return string with human readable description of the group. + + :returns: Readable description. + :rtype: str + """ + return "size={size} avg={avg} stdev={stdev}".format( + size=self.size, avg=self.avg, stdev=self.stdev) + + def __repr__(self): + """Return string executable as Python constructor call. + + :returns: Executable constructor call. + :rtype: str + """ + return "AvgStdevMetadata(size={size},avg={avg},stdev={stdev})".format( + size=self.size, avg=self.avg, stdev=self.stdev) diff --git a/PyPI/jumpavg/jumpavg/AvgStdevMetadataFactory.py b/PyPI/jumpavg/jumpavg/AvgStdevMetadataFactory.py new file mode 100644 index 0000000000..6d2e967a88 --- /dev/null +++ b/PyPI/jumpavg/jumpavg/AvgStdevMetadataFactory.py @@ -0,0 +1,51 @@ +# Copyright (c) 2018 Cisco and/or its affiliates. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Module holding AvgStdevMetadataFactory class.""" + +import math + +from AvgStdevMetadata import AvgStdevMetadata + + +class AvgStdevMetadataFactory(object): + """Class factory which creates avg,stdev metadata from data.""" + + @staticmethod + def from_data(values): + """Return new metadata object fitting the values. + + :param values: Run values to be processed. + :type values: Iterable of float or of AvgStdevMetadata + :returns: The metadata matching the values. + :rtype: AvgStdevMetadata + """ + sum_0 = 0 + sum_1 = 0.0 + sum_2 = 0.0 + for value in values: + if isinstance(value, AvgStdevMetadata): + sum_0 += value.size + sum_1 += value.avg * value.size + sum_2 += value.stdev * value.stdev * value.size + sum_2 += value.avg * value.avg * value.size + else: # The value is assumed to be float. + sum_0 += 1 + sum_1 += value + sum_2 += value * value + if sum_0 < 1: + return AvgStdevMetadata() + avg = sum_1 / sum_0 + stdev = math.sqrt(sum_2 / sum_0 - avg * avg) + ret_obj = AvgStdevMetadata(size=sum_0, avg=avg, stdev=stdev) + return ret_obj diff --git a/PyPI/jumpavg/jumpavg/BitCountingClassifier.py b/PyPI/jumpavg/jumpavg/BitCountingClassifier.py new file mode 100644 index 0000000000..9a723199d2 --- /dev/null +++ b/PyPI/jumpavg/jumpavg/BitCountingClassifier.py @@ -0,0 +1,70 @@ +# Copyright (c) 2018 Cisco and/or its affiliates. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Module holding BitCountingClassifier class. + +This is the main class to be used by callers.""" + +from AbstractGroupClassifier import AbstractGroupClassifier +from BitCountingGroup import BitCountingGroup +from BitCountingGroupList import BitCountingGroupList +from BitCountingMetadataFactory import BitCountingMetadataFactory +from ClassifiedMetadataFactory import ClassifiedMetadataFactory + + +class BitCountingClassifier(AbstractGroupClassifier): + """Classifier using Minimal Description Length principle.""" + + def classify(self, values): + """Return the values in groups of optimal bit count. + + The current implementation could be a static method, + but we might support options in later versions, + for example for chosing encodings. + + :param values: Sequence of runs to classify. + :type values: Iterable of float or of AvgStdevMetadata + :returns: Classified group list. + :rtype: BitCountingGroupList + """ + max_value = BitCountingMetadataFactory.find_max_value(values) + factory = BitCountingMetadataFactory(max_value) + opened_at = [] + closed_before = [BitCountingGroupList()] + for index, value in enumerate(values): + singleton = BitCountingGroup(factory, [value]) + newly_opened = closed_before[index].with_group_appended(singleton) + opened_at.append(newly_opened) + record_group_list = newly_opened + for previous in range(index): + previous_opened_list = opened_at[previous] + still_opened = ( + previous_opened_list.with_value_added_to_last_group(value)) + opened_at[previous] = still_opened + if still_opened.bits < record_group_list.bits: + record_group_list = still_opened + closed_before.append(record_group_list) + partition = closed_before[-1] + previous_average = partition[0].metadata.avg + for group in partition: + if group.metadata.avg == previous_average: + group.metadata = ClassifiedMetadataFactory.with_classification( + group.metadata, "normal") + elif group.metadata.avg < previous_average: + group.metadata = ClassifiedMetadataFactory.with_classification( + group.metadata, "regression") + elif group.metadata.avg > previous_average: + group.metadata = ClassifiedMetadataFactory.with_classification( + group.metadata, "progression") + previous_average = group.metadata.avg + return partition diff --git a/PyPI/jumpavg/jumpavg/BitCountingGroup.py b/PyPI/jumpavg/jumpavg/BitCountingGroup.py new file mode 100644 index 0000000000..2071c061ea --- /dev/null +++ b/PyPI/jumpavg/jumpavg/BitCountingGroup.py @@ -0,0 +1,50 @@ +# Copyright (c) 2018 Cisco and/or its affiliates. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Module holding BitCountingGroup class.""" + +from RunGroup import RunGroup + + +class BitCountingGroup(RunGroup): + """RunGroup with BitCountingMetadata. + + Support with_run_added() method to simplify extending the group. + As bit content has to be re-counted, metadata factory is stored. + """ + + def __init__(self, metadata_factory, values=[]): + """Create the group from metadata factory and values. + + :param metadata_factory: Factory object to create metadata with. + :param values: The runs belonging to this group. + :type metadata_factory: BitCountingMetadataFactory + :type values: Iterable of float or of AvgStdevMetadata + """ + self.metadata_factory = metadata_factory + metadata = metadata_factory.from_data(values) + super(BitCountingGroup, self).__init__(metadata, values) + + def with_run_added(self, value): + """Create and return a new group with one more run that self. + + :param value: The run value to add to the group. + :type value: float or od AvgStdevMetadata + :returns: New group with the run added. + :rtype: BitCountingGroup + """ + values = list(self.values) + values.append(value) + return BitCountingGroup(self.metadata_factory, values) + # TODO: Is there a good way to save some computation + # by copy&updating the metadata incrementally? diff --git a/PyPI/jumpavg/jumpavg/BitCountingGroupList.py b/PyPI/jumpavg/jumpavg/BitCountingGroupList.py new file mode 100644 index 0000000000..1f69c0635d --- /dev/null +++ b/PyPI/jumpavg/jumpavg/BitCountingGroupList.py @@ -0,0 +1,87 @@ +# Copyright (c) 2018 Cisco and/or its affiliates. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Module holding BitCountingGroupList class.""" + +from BitCountingGroup import BitCountingGroup +from BitCountingMetadataFactory import BitCountingMetadataFactory + + +class BitCountingGroupList(list): + """List of BitCountingGroup which tracks overall bit count. + + This is useful, as bit count of a subsequent group + depends on average of the previous group. + Having the logic encapsulated here spares the caller + the effort to pass averages around. + + Method with_value_added_to_last_group() delegates to BitCountingGroup, + with_group_appended() adds new group with recalculated bits. + + TODO: last_group.metadata_factory.max_value in with_group_appended() + is ugly, find a more natural class design. + """ + + def __init__(self, group_list=[], bits=None): + """Create a group list from given list of groups. + + :param group_list: List of groups to compose this group. + :param bits: Bit count if known, else None. + :type group_list: list of BitCountingGroup + :type bits: float or None + """ + super(BitCountingGroupList, self).__init__(group_list) + if bits is not None: + self.bits = bits + return + bits = 0.0 + for group in group_list: + bits += group.metadata.bits + self.bits = bits + + def with_group_appended(self, group): + """Create and return new group list with given group more than self. + + The group argument object is updated with derivative metadata. + + :param group: Next group to be appended to the group list. + :type group: BitCountingGroup + :returns: New group list with added group. + :rtype: BitCountingGroupList + """ + group_list = list(self) + if group_list: + last_group = group_list[-1] + factory = BitCountingMetadataFactory( + last_group.metadata_factory.max_value, last_group.metadata.avg) + group.metadata_factory = factory + group.metadata = factory.from_data(group.values) + group_list.append(group) + bits = self.bits + group.metadata.bits + return BitCountingGroupList(group_list, bits) + + def with_value_added_to_last_group(self, value): + """Create and return new group list with value added to last group. + + :param value: The run value to add to the last group. + :type value: float or od AvgStdevMetadata + :returns: New group list with the last group updated. + :rtype: BitCountingGroupList + """ + group_list = list(self) + last_group = group_list[-1] + bits_before = last_group.metadata.bits + last_group = last_group.with_run_added(value) + group_list[-1] = last_group + bits = self.bits - bits_before + last_group.metadata.bits + return BitCountingGroupList(group_list, bits) diff --git a/PyPI/jumpavg/jumpavg/BitCountingMetadata.py b/PyPI/jumpavg/jumpavg/BitCountingMetadata.py new file mode 100644 index 0000000000..d25d355cab --- /dev/null +++ b/PyPI/jumpavg/jumpavg/BitCountingMetadata.py @@ -0,0 +1,109 @@ +# Copyright (c) 2018 Cisco and/or its affiliates. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Module holding BitCountingMetadata class.""" + +import math + +from AvgStdevMetadata import AvgStdevMetadata + + +class BitCountingMetadata(AvgStdevMetadata): + """Class for metadata which includes information content of a group. + + The information content is based on an assumption + that the data consists of independent random values + from a normal distribution. + """ + + def __init__(self, max_value, size=0, avg=0.0, stdev=0.0, prev_avg=None): + """Construct the metadata by computing from the values needed. + + The bit count is not real, as that would depend on numeric precision + (number of significant bits in values). + The difference is assumed to be constant per value, + which is consistent with Gauss distribution + (but not with floating point mechanic). + The hope is the difference will have + no real impact on the classification procedure. + + :param max_value: Maximal expected value. + TODO: This might be more optimal, + but max-invariant algorithm will be nicer. + :param size: Number of values participating in this group. + :param avg: Population average of the participating sample values. + :param stdev: Population standard deviation of the sample values. + :param prev_avg: Population average of the previous group. + If None, no previous average is taken into account. + If not None, the given previous average is used to discourage + consecutive groups with similar averages + (opposite triangle distribution is assumed). + :type max_value: float + :type size: int + :type avg: float + :type stdev: float + :type prev_avg: float or None + """ + super(BitCountingMetadata, self).__init__(size, avg, stdev) + self.max_value = max_value + self.prev_avg = prev_avg + self.bits = 0.0 + if self.size < 1: + return + # Length of the sequence must be also counted in bits, + # otherwise the message would not be decodable. + # Model: probability of k samples is 1/k - 1/(k+1) + # == 1/k/(k+1) + self.bits += math.log(size * (size + 1), 2) + if prev_avg is None: + # Avg is considered to be uniformly distributed + # from zero to max_value. + self.bits += math.log(max_value + 1.0, 2) + else: + # Opposite triangle distribution with minimum. + self.bits += math.log( + max_value * (max_value + 1) / (abs(avg - prev_avg) + 1), 2) + if self.size < 2: + return + # Stdev is considered to be uniformly distributed + # from zero to max_value. That is quite a bad expectation, + # but resilient to negative samples etc. + self.bits += math.log(max_value + 1.0, 2) + # Now we know the samples lie on sphere in size-1 dimensions. + # So it is (size-2)-sphere, with radius^2 == stdev^2 * size. + # https://en.wikipedia.org/wiki/N-sphere + sphere_area_ln = math.log(2) + math.log(math.pi) * ((size - 1) / 2.0) + sphere_area_ln -= math.lgamma((size - 1) / 2.0) + sphere_area_ln += math.log(stdev + 1.0) * (size - 2) + sphere_area_ln += math.log(size) * ((size - 2) / 2.0) + self.bits += sphere_area_ln / math.log(2) + + def __str__(self): + """Return string with human readable description of the group. + + :returns: Readable description. + :rtype: str + """ + return "size={size} avg={avg} stdev={stdev} bits={bits}".format( + size=self.size, avg=self.avg, stdev=self.stdev, bits=self.bits) + + def __repr__(self): + """Return string executable as Python constructor call. + + :returns: Executable constructor call. + :rtype: str + """ + return ("BitCountingMetadata(max_value={max_value},size={size}," + + "avg={avg},stdev={stdev},prev_avg={prev_avg})").format( + max_value=self.max_value, size=self.size, avg=self.avg, + stdev=self.stdev, prev_avg=self.prev_avg) diff --git a/PyPI/jumpavg/jumpavg/BitCountingMetadataFactory.py b/PyPI/jumpavg/jumpavg/BitCountingMetadataFactory.py new file mode 100644 index 0000000000..567c3d4fe6 --- /dev/null +++ b/PyPI/jumpavg/jumpavg/BitCountingMetadataFactory.py @@ -0,0 +1,85 @@ +# Copyright (c) 2018 Cisco and/or its affiliates. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Module holding BitCountingMetadataFactory class.""" + +import math + +from AvgStdevMetadata import AvgStdevMetadata +from AvgStdevMetadataFactory import AvgStdevMetadataFactory +from BitCountingMetadata import BitCountingMetadata + + +class BitCountingMetadataFactory(object): + """Class for factory which creates bit counting metadata from data. + + TODO: Summarize the methods? + """ + + @staticmethod + def find_max_value(values): + """Return the max value. + + This is a separate helper method, + because the whole set of values is usually larger than in from_data(). + + :param values: Run values to be processed. + :type values: Iterable of float + :returns: 0.0 or the biggest value found. + :rtype: float + """ + max_value = 0.0 + for value in values: + if isinstance(value, AvgStdevMetadata): + value = value.avg + if value > max_value: + max_value = value + return max_value + + def __init__(self, max_value, prev_avg=None): + """Construct the factory instance with given arguments. + + :param max_value: Maximal expected value. + :param prev_avg: Population average of the previous group. + If None, no previous average is taken into account. + If not None, the given previous average is used to discourage + consecutive groups with similar averages + (opposite triangle distribution is assumed). + :type max_value: float + :type prev_avg: float or None + """ + self.max_value = max_value + self.prev_avg = prev_avg + + def from_avg_stdev_metadata(self, metadata): + """Return new metadata object by adding bits to existing metadata. + + :param metadata: Metadata to count bits for. + :type metadata: AvgStdevMetadata + :returns: The metadata with bits counted. + :rtype: BitCountingMetadata + """ + return BitCountingMetadata( + max_value=self.max_value, size=metadata.size, + avg=metadata.avg, stdev=metadata.stdev, prev_avg=self.prev_avg) + + def from_data(self, values): + """Return new metadata object fitting the values. + + :param values: Run values to be processed. + :type values: Iterable of float or of AvgStdevMetadata + :returns: The metadata matching the values. + :rtype: BitCountingMetadata + """ + metadata = AvgStdevMetadataFactory.from_data(values) + return self.from_avg_stdev_metadata(metadata) diff --git a/PyPI/jumpavg/jumpavg/ClassifiedBitCountingMetadata.py b/PyPI/jumpavg/jumpavg/ClassifiedBitCountingMetadata.py new file mode 100644 index 0000000000..29359f0908 --- /dev/null +++ b/PyPI/jumpavg/jumpavg/ClassifiedBitCountingMetadata.py @@ -0,0 +1,73 @@ +# Copyright (c) 2018 Cisco and/or its affiliates. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Module holding ClassifiedBitCountingMetadata class.""" + +from BitCountingMetadata import BitCountingMetadata + + +class ClassifiedBitCountingMetadata(BitCountingMetadata): + """Class for metadata which includes classification. + + TODO: Can we create ClassifiedMetadata and inherit (also) from that? + """ + + def __init__( + self, max_value, size=0, avg=0.0, stdev=0.0, prev_avg=None, + classification=None): + """Delegate to ancestor constructors and set classification. + + :param max_value: Maximal expected value. + :param size: Number of values participating in this group. + :param avg: Population average of the participating sample values. + :param stdev: Population standard deviation of the sample values. + :param prev_avg: Population average of the previous group. + If None, no previous average is taken into account. + If not None, the given previous average is used to discourage + consecutive groups with similar averages + (opposite triangle distribution is assumed). + :param classification: Arbitrary object classifying this group. + :type max_value: float + :type size: int + :type avg: float + :type stdev: float + :type prev_avg: float + :type classification: object + """ + super(ClassifiedBitCountingMetadata, self).__init__( + max_value, size, avg, stdev, prev_avg) + self.classification = classification + + def __str__(self): + """Return string with human readable description of the group. + + :returns: Readable description. + :rtype: str + """ + # str(super(...)) describes the proxy, not the proxied object. + super_str = super(ClassifiedBitCountingMetadata, self).__str__() + return super_str + " classification={classification}".format( + classification=self.classification) + + def __repr__(self): + """Return string executable as Python constructor call. + + :returns: Executable constructor call. + :rtype: str + """ + return ("ClassifiedBitCountingMetadata(max_value={max_value}," + + "size={size},avg={avg},stdev={stdev},prev_avg={prev_avg}," + + "classification={cls})").format( + max_value=self.max_value, size=self.size, avg=self.avg, + stdev=self.stdev, prev_avg=self.prev_avg, + cls=self.classification) diff --git a/PyPI/jumpavg/jumpavg/ClassifiedMetadataFactory.py b/PyPI/jumpavg/jumpavg/ClassifiedMetadataFactory.py new file mode 100644 index 0000000000..7fdea7c162 --- /dev/null +++ b/PyPI/jumpavg/jumpavg/ClassifiedMetadataFactory.py @@ -0,0 +1,42 @@ +# Copyright (c) 2018 Cisco and/or its affiliates. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Module holding ClassifiedBitCountingMetadata class.""" + +from ClassifiedBitCountingMetadata import ClassifiedBitCountingMetadata + + +class ClassifiedMetadataFactory(object): + """Class for factory which adds classification to bit counting metadata.""" + + @staticmethod + def with_classification(metadata, classification): + """Return new metadata object with added classification. + + TODO: Is there a way to add classification to any metadata, + without messing up constructors and __repr__()? + + FIXME: Factories take raw resources. Find a name for the thing + which takes semi-finished products. Transformer? + + :param metadata: Existing metadata without classification. + :param classification: Arbitrary object classifying this group. + :type metadata: BitCountingMetadata + :type classification: object + :returns: The metadata with added classification. + :rtype: ClassifiedBitCountingMetadata + """ + return ClassifiedBitCountingMetadata( + max_value=metadata.max_value, size=metadata.size, avg=metadata.avg, + stdev=metadata.stdev, prev_avg=metadata.prev_avg, + classification=classification) diff --git a/PyPI/jumpavg/jumpavg/RunGroup.py b/PyPI/jumpavg/jumpavg/RunGroup.py new file mode 100644 index 0000000000..9de8ae8919 --- /dev/null +++ b/PyPI/jumpavg/jumpavg/RunGroup.py @@ -0,0 +1,34 @@ +# Copyright (c) 2018 Cisco and/or its affiliates. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Module holding RunGroup class.""" + + +class RunGroup(object): + """Effectively a named touple of data and metadata. + + TODO: This feels like an abstract class. + Most uses assume restrictions on metadata type. + Can this be defined similarly to C++ templates? + """ + + def __init__(self, metadata, values): + """Create the group from metadata and values. + + :param metadata: Metadata object to associate with the group. + :param values: The runs belonging to this group. + :type metadata: AbstractGroupMetadata + :type values: Iterable of float or od AvgStdevMetadata + """ + self.metadata = metadata + self.values = values diff --git a/PyPI/jumpavg/jumpavg/__init__.py b/PyPI/jumpavg/jumpavg/__init__.py new file mode 100644 index 0000000000..8e41ed9fe2 --- /dev/null +++ b/PyPI/jumpavg/jumpavg/__init__.py @@ -0,0 +1,16 @@ +# Copyright (c) 2018 Cisco and/or its affiliates. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +__init__ file for "jumpavg" Python package. +""" diff --git a/PyPI/jumpavg/setup.cfg b/PyPI/jumpavg/setup.cfg new file mode 100644 index 0000000000..b4abd1bd30 --- /dev/null +++ b/PyPI/jumpavg/setup.cfg @@ -0,0 +1,7 @@ +[metadata] +# This includes the license file in the wheel. +license_file = LICENSE.txt + +[bdist_wheel] +# TODO: Make the code work both on Python 2 and 3. +universal=0 diff --git a/PyPI/jumpavg/setup.py b/PyPI/jumpavg/setup.py new file mode 100644 index 0000000000..aa56efd9c9 --- /dev/null +++ b/PyPI/jumpavg/setup.py @@ -0,0 +1,52 @@ +"""A setup module for setuptools. + +See: +https://packaging.python.org/en/latest/distributing.html +""" + +from setuptools import setup, find_packages +from os import path +from io import open + +here = path.abspath(path.dirname(__file__)) +with open(path.join(here, "README.rst"), encoding="utf-8") as f: + long_description = f.read() + +setup( + name="jumpavg", + version="0.1.2", # This is currently the only place listing the version. + description="Library for finding changes in time series by grouping results.", + long_description=long_description, + long_description_content_type="text/x-rst", + # TODO: Create a separate webpage for jumpavg library. + url="https://gerrit.fd.io/r/gitweb?p=csit.git;a=tree;f=PyPI/jumpavg;hb=refs/heads/master", + author="Cisco Systems Inc. and/or its affiliates", + author_email="csit-dev@lists.fd.io", + classifiers=[ + "Development Status :: 3 - Alpha", + "Intended Audience :: Science/Research", + # Pick your license as you wish + "License :: OSI Approved :: Apache Software License", + "Natural Language :: English", + # TODO: Test which Python versions is the code compatible with. + "Programming Language :: Python :: 2.7", + "Topic :: Scientific/Engineering :: Information Analysis" + ], + keywords="progression regression anomaly detection", + packages=find_packages(exclude=[]), + # TODO: python_requires="~=2.7" + install_requires=[], + # TODO: Include simulator and tests. + extras_require={ + }, + package_data={ + }, + entry_points={ + "console_scripts": [ + ], + }, + project_urls={ + "Bug Reports": "https://jira.fd.io/projects/CSIT/issues", + "Source": "https://gerrit.fd.io/r/gitweb?p=csit.git;a=tree;f=PyPI/jumpavg;hb=refs/heads/master", + }, +) -- cgit 1.2.3-korg