From 627cddca1d64edb8475407a1524efb2a22249a25 Mon Sep 17 00:00:00 2001 From: Vratko Polak Date: Mon, 2 Dec 2019 18:38:44 +0100 Subject: [PATCH] Refactor jumpavg to be more readable and usable This is the python3 change, the python2 is still used for testing bisect. + New version is 0.2.0 due to amount o API changes. + Jumpavg is now part of CSIT resource package tree. + Perpatch migrated to new jumpavg. - PAL NOT updated (the update moved to a different Change). Change-Id: I7d7a8bf8a411196c20c2a40a8c64478d6709bc07 Signed-off-by: Vratko Polak --- PyPI/jumpavg/jumpavg | 1 + PyPI/jumpavg/jumpavg/AbstractGroupClassifier.py | 40 ----- PyPI/jumpavg/jumpavg/AbstractGroupMetadata.py | 42 ----- PyPI/jumpavg/jumpavg/AvgStdevMetadata.py | 54 ------ PyPI/jumpavg/jumpavg/AvgStdevMetadataFactory.py | 54 ------ PyPI/jumpavg/jumpavg/BitCountingClassifier.py | 70 -------- PyPI/jumpavg/jumpavg/BitCountingGroup.py | 50 ------ PyPI/jumpavg/jumpavg/BitCountingGroupList.py | 87 ---------- PyPI/jumpavg/jumpavg/BitCountingMetadata.py | 109 ------------ PyPI/jumpavg/jumpavg/BitCountingMetadataFactory.py | 85 ---------- .../jumpavg/ClassifiedBitCountingMetadata.py | 73 -------- PyPI/jumpavg/jumpavg/ClassifiedMetadataFactory.py | 42 ----- PyPI/jumpavg/jumpavg/RunGroup.py | 34 ---- PyPI/jumpavg/setup.py | 50 +++--- resources/libraries/bash/function/per_patch.sh | 7 +- .../libraries/python/jumpavg/AvgStdevStats.py | 113 +++++++++++++ .../libraries/python/jumpavg/BitCountingGroup.py | 173 +++++++++++++++++++ .../python/jumpavg/BitCountingGroupList.py | 185 +++++++++++++++++++++ .../libraries/python/jumpavg/BitCountingStats.py | 169 +++++++++++++++++++ .../libraries/python}/jumpavg/__init__.py | 8 +- resources/libraries/python/jumpavg/classify.py | 76 +++++++++ resources/tools/doc_gen/.gitignore | 1 + .../{scripts => integrated}/compare_perpatch.py | 48 +++--- resources/tools/scripts/perpatch_requirements.txt | 5 +- 24 files changed, 787 insertions(+), 789 deletions(-) create mode 120000 PyPI/jumpavg/jumpavg delete mode 100644 PyPI/jumpavg/jumpavg/AbstractGroupClassifier.py delete mode 100644 PyPI/jumpavg/jumpavg/AbstractGroupMetadata.py delete mode 100644 PyPI/jumpavg/jumpavg/AvgStdevMetadata.py delete mode 100644 PyPI/jumpavg/jumpavg/AvgStdevMetadataFactory.py delete mode 100644 PyPI/jumpavg/jumpavg/BitCountingClassifier.py delete mode 100644 PyPI/jumpavg/jumpavg/BitCountingGroup.py delete mode 100644 PyPI/jumpavg/jumpavg/BitCountingGroupList.py delete mode 100644 PyPI/jumpavg/jumpavg/BitCountingMetadata.py delete mode 100644 PyPI/jumpavg/jumpavg/BitCountingMetadataFactory.py delete mode 100644 PyPI/jumpavg/jumpavg/ClassifiedBitCountingMetadata.py delete mode 100644 PyPI/jumpavg/jumpavg/ClassifiedMetadataFactory.py delete mode 100644 PyPI/jumpavg/jumpavg/RunGroup.py create mode 100644 resources/libraries/python/jumpavg/AvgStdevStats.py create mode 100644 resources/libraries/python/jumpavg/BitCountingGroup.py create mode 100644 resources/libraries/python/jumpavg/BitCountingGroupList.py create mode 100644 resources/libraries/python/jumpavg/BitCountingStats.py rename {PyPI/jumpavg => resources/libraries/python}/jumpavg/__init__.py (68%) create mode 100644 resources/libraries/python/jumpavg/classify.py rename resources/tools/{scripts => integrated}/compare_perpatch.py (73%) diff --git a/PyPI/jumpavg/jumpavg b/PyPI/jumpavg/jumpavg new file mode 120000 index 0000000000..df32002d6b --- /dev/null +++ b/PyPI/jumpavg/jumpavg @@ -0,0 +1 @@ +../../resources/libraries/python/jumpavg \ No newline at end of file diff --git a/PyPI/jumpavg/jumpavg/AbstractGroupClassifier.py b/PyPI/jumpavg/jumpavg/AbstractGroupClassifier.py deleted file mode 100644 index 2612b009da..0000000000 --- a/PyPI/jumpavg/jumpavg/AbstractGroupClassifier.py +++ /dev/null @@ -1,40 +0,0 @@ -# Copyright (c) 2018 Cisco and/or its affiliates. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at: -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Module holding AbstractGroupClassifier class.""" - -from abc import ABCMeta, abstractmethod - - -class AbstractGroupClassifier(object): - """Abstract class defining API for classifier. - - The classifier is an object with classify() method - which divides data into groups containing metadata. - """ - - __metaclass__ = ABCMeta - - @abstractmethod - def classify(self, values): - """Divide values into consecutive groups with metadata. - - The metadata does not need to follow any specific rules, - although progression/regression/outlier description would be fine. - - :param values: Sequence of runs to classify. - :type values: Iterable of float or of AvgStdevMetadata - :returns: Classified groups - :rtype: Iterable of RunGroup - """ - pass diff --git a/PyPI/jumpavg/jumpavg/AbstractGroupMetadata.py b/PyPI/jumpavg/jumpavg/AbstractGroupMetadata.py deleted file mode 100644 index 3235dbd485..0000000000 --- a/PyPI/jumpavg/jumpavg/AbstractGroupMetadata.py +++ /dev/null @@ -1,42 +0,0 @@ -# Copyright (c) 2018 Cisco and/or its affiliates. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at: -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Module holding AbstractGroupMetadata class.""" - -from abc import ABCMeta, abstractmethod - - -class AbstractGroupMetadata(object): - """Abstract classdefining API for metadata. - - At this level, only __str__() and __repr() methods are required.""" - - __metaclass__ = ABCMeta - - @abstractmethod - def __str__(self): - """Return string with human readable description of the group. - - :returns: Readable description. - :rtype: str - """ - pass - - @abstractmethod - def __repr__(self): - """Return string executable as Python constructor call. - - :returns: Executable constructor call. - :rtype: str - """ - pass diff --git a/PyPI/jumpavg/jumpavg/AvgStdevMetadata.py b/PyPI/jumpavg/jumpavg/AvgStdevMetadata.py deleted file mode 100644 index efc1a90cd4..0000000000 --- a/PyPI/jumpavg/jumpavg/AvgStdevMetadata.py +++ /dev/null @@ -1,54 +0,0 @@ -# Copyright (c) 2018 Cisco and/or its affiliates. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at: -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Module for holding AvgStdevMetadata class.""" - -from AbstractGroupMetadata import AbstractGroupMetadata - - -class AvgStdevMetadata(AbstractGroupMetadata): - """Class for metadata specifying the average and standard deviation.""" - - def __init__(self, size=0, avg=0.0, stdev=0.0): - """Construct the metadata by setting the values needed. - - The values are sanitized, so faulty callers to not cause math errors. - - :param size: Number of values participating in this group. - :param avg: Population average of the participating sample values. - :param stdev: Population standard deviation of the sample values. - :type size: int - :type avg: float - :type stdev: float - """ - self.size = size if size >= 0 else 0 - self.avg = avg if size >= 1 else 0.0 - self.stdev = stdev if size >= 2 else 0.0 - - def __str__(self): - """Return string with human readable description of the group. - - :returns: Readable description. - :rtype: str - """ - return "size={size} avg={avg} stdev={stdev}".format( - size=self.size, avg=self.avg, stdev=self.stdev) - - def __repr__(self): - """Return string executable as Python constructor call. - - :returns: Executable constructor call. - :rtype: str - """ - return "AvgStdevMetadata(size={size},avg={avg},stdev={stdev})".format( - size=self.size, avg=self.avg, stdev=self.stdev) diff --git a/PyPI/jumpavg/jumpavg/AvgStdevMetadataFactory.py b/PyPI/jumpavg/jumpavg/AvgStdevMetadataFactory.py deleted file mode 100644 index 25bc600aeb..0000000000 --- a/PyPI/jumpavg/jumpavg/AvgStdevMetadataFactory.py +++ /dev/null @@ -1,54 +0,0 @@ -# Copyright (c) 2018 Cisco and/or its affiliates. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at: -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Module holding AvgStdevMetadataFactory class.""" - -import math - -from AvgStdevMetadata import AvgStdevMetadata - - -class AvgStdevMetadataFactory(object): - """Class factory which creates avg,stdev metadata from data.""" - - @staticmethod - def from_data(values): - """Return new metadata object fitting the values. - - :param values: Run values to be processed. - :type values: Iterable of float or of AvgStdevMetadata - :returns: The metadata matching the values. - :rtype: AvgStdevMetadata - """ - # Using Welford method to be more resistant to rounding errors. - # Adapted from code for sample standard deviation at: - # https://www.johndcook.com/blog/standard_deviation/ - # The logic of plus operator is taken from - # https://www.johndcook.com/blog/skewness_kurtosis/ - size = 0 - avg = 0.0 - moment_2 = 0.0 - for value in values: - if not isinstance(value, AvgStdevMetadata): - value = AvgStdevMetadata(size=1, avg=value) - old_size = size - delta = value.avg - avg - size += value.size - avg += delta * value.size / size - moment_2 += value.stdev * value.stdev * value.size - moment_2 += delta * delta * old_size * value.size / size - if size < 1: - return AvgStdevMetadata() - stdev = math.sqrt(moment_2 / size) - ret_obj = AvgStdevMetadata(size=size, avg=avg, stdev=stdev) - return ret_obj diff --git a/PyPI/jumpavg/jumpavg/BitCountingClassifier.py b/PyPI/jumpavg/jumpavg/BitCountingClassifier.py deleted file mode 100644 index 9a723199d2..0000000000 --- a/PyPI/jumpavg/jumpavg/BitCountingClassifier.py +++ /dev/null @@ -1,70 +0,0 @@ -# Copyright (c) 2018 Cisco and/or its affiliates. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at: -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Module holding BitCountingClassifier class. - -This is the main class to be used by callers.""" - -from AbstractGroupClassifier import AbstractGroupClassifier -from BitCountingGroup import BitCountingGroup -from BitCountingGroupList import BitCountingGroupList -from BitCountingMetadataFactory import BitCountingMetadataFactory -from ClassifiedMetadataFactory import ClassifiedMetadataFactory - - -class BitCountingClassifier(AbstractGroupClassifier): - """Classifier using Minimal Description Length principle.""" - - def classify(self, values): - """Return the values in groups of optimal bit count. - - The current implementation could be a static method, - but we might support options in later versions, - for example for chosing encodings. - - :param values: Sequence of runs to classify. - :type values: Iterable of float or of AvgStdevMetadata - :returns: Classified group list. - :rtype: BitCountingGroupList - """ - max_value = BitCountingMetadataFactory.find_max_value(values) - factory = BitCountingMetadataFactory(max_value) - opened_at = [] - closed_before = [BitCountingGroupList()] - for index, value in enumerate(values): - singleton = BitCountingGroup(factory, [value]) - newly_opened = closed_before[index].with_group_appended(singleton) - opened_at.append(newly_opened) - record_group_list = newly_opened - for previous in range(index): - previous_opened_list = opened_at[previous] - still_opened = ( - previous_opened_list.with_value_added_to_last_group(value)) - opened_at[previous] = still_opened - if still_opened.bits < record_group_list.bits: - record_group_list = still_opened - closed_before.append(record_group_list) - partition = closed_before[-1] - previous_average = partition[0].metadata.avg - for group in partition: - if group.metadata.avg == previous_average: - group.metadata = ClassifiedMetadataFactory.with_classification( - group.metadata, "normal") - elif group.metadata.avg < previous_average: - group.metadata = ClassifiedMetadataFactory.with_classification( - group.metadata, "regression") - elif group.metadata.avg > previous_average: - group.metadata = ClassifiedMetadataFactory.with_classification( - group.metadata, "progression") - previous_average = group.metadata.avg - return partition diff --git a/PyPI/jumpavg/jumpavg/BitCountingGroup.py b/PyPI/jumpavg/jumpavg/BitCountingGroup.py deleted file mode 100644 index 2071c061ea..0000000000 --- a/PyPI/jumpavg/jumpavg/BitCountingGroup.py +++ /dev/null @@ -1,50 +0,0 @@ -# Copyright (c) 2018 Cisco and/or its affiliates. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at: -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Module holding BitCountingGroup class.""" - -from RunGroup import RunGroup - - -class BitCountingGroup(RunGroup): - """RunGroup with BitCountingMetadata. - - Support with_run_added() method to simplify extending the group. - As bit content has to be re-counted, metadata factory is stored. - """ - - def __init__(self, metadata_factory, values=[]): - """Create the group from metadata factory and values. - - :param metadata_factory: Factory object to create metadata with. - :param values: The runs belonging to this group. - :type metadata_factory: BitCountingMetadataFactory - :type values: Iterable of float or of AvgStdevMetadata - """ - self.metadata_factory = metadata_factory - metadata = metadata_factory.from_data(values) - super(BitCountingGroup, self).__init__(metadata, values) - - def with_run_added(self, value): - """Create and return a new group with one more run that self. - - :param value: The run value to add to the group. - :type value: float or od AvgStdevMetadata - :returns: New group with the run added. - :rtype: BitCountingGroup - """ - values = list(self.values) - values.append(value) - return BitCountingGroup(self.metadata_factory, values) - # TODO: Is there a good way to save some computation - # by copy&updating the metadata incrementally? diff --git a/PyPI/jumpavg/jumpavg/BitCountingGroupList.py b/PyPI/jumpavg/jumpavg/BitCountingGroupList.py deleted file mode 100644 index 1f69c0635d..0000000000 --- a/PyPI/jumpavg/jumpavg/BitCountingGroupList.py +++ /dev/null @@ -1,87 +0,0 @@ -# Copyright (c) 2018 Cisco and/or its affiliates. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at: -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Module holding BitCountingGroupList class.""" - -from BitCountingGroup import BitCountingGroup -from BitCountingMetadataFactory import BitCountingMetadataFactory - - -class BitCountingGroupList(list): - """List of BitCountingGroup which tracks overall bit count. - - This is useful, as bit count of a subsequent group - depends on average of the previous group. - Having the logic encapsulated here spares the caller - the effort to pass averages around. - - Method with_value_added_to_last_group() delegates to BitCountingGroup, - with_group_appended() adds new group with recalculated bits. - - TODO: last_group.metadata_factory.max_value in with_group_appended() - is ugly, find a more natural class design. - """ - - def __init__(self, group_list=[], bits=None): - """Create a group list from given list of groups. - - :param group_list: List of groups to compose this group. - :param bits: Bit count if known, else None. - :type group_list: list of BitCountingGroup - :type bits: float or None - """ - super(BitCountingGroupList, self).__init__(group_list) - if bits is not None: - self.bits = bits - return - bits = 0.0 - for group in group_list: - bits += group.metadata.bits - self.bits = bits - - def with_group_appended(self, group): - """Create and return new group list with given group more than self. - - The group argument object is updated with derivative metadata. - - :param group: Next group to be appended to the group list. - :type group: BitCountingGroup - :returns: New group list with added group. - :rtype: BitCountingGroupList - """ - group_list = list(self) - if group_list: - last_group = group_list[-1] - factory = BitCountingMetadataFactory( - last_group.metadata_factory.max_value, last_group.metadata.avg) - group.metadata_factory = factory - group.metadata = factory.from_data(group.values) - group_list.append(group) - bits = self.bits + group.metadata.bits - return BitCountingGroupList(group_list, bits) - - def with_value_added_to_last_group(self, value): - """Create and return new group list with value added to last group. - - :param value: The run value to add to the last group. - :type value: float or od AvgStdevMetadata - :returns: New group list with the last group updated. - :rtype: BitCountingGroupList - """ - group_list = list(self) - last_group = group_list[-1] - bits_before = last_group.metadata.bits - last_group = last_group.with_run_added(value) - group_list[-1] = last_group - bits = self.bits - bits_before + last_group.metadata.bits - return BitCountingGroupList(group_list, bits) diff --git a/PyPI/jumpavg/jumpavg/BitCountingMetadata.py b/PyPI/jumpavg/jumpavg/BitCountingMetadata.py deleted file mode 100644 index d25d355cab..0000000000 --- a/PyPI/jumpavg/jumpavg/BitCountingMetadata.py +++ /dev/null @@ -1,109 +0,0 @@ -# Copyright (c) 2018 Cisco and/or its affiliates. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at: -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Module holding BitCountingMetadata class.""" - -import math - -from AvgStdevMetadata import AvgStdevMetadata - - -class BitCountingMetadata(AvgStdevMetadata): - """Class for metadata which includes information content of a group. - - The information content is based on an assumption - that the data consists of independent random values - from a normal distribution. - """ - - def __init__(self, max_value, size=0, avg=0.0, stdev=0.0, prev_avg=None): - """Construct the metadata by computing from the values needed. - - The bit count is not real, as that would depend on numeric precision - (number of significant bits in values). - The difference is assumed to be constant per value, - which is consistent with Gauss distribution - (but not with floating point mechanic). - The hope is the difference will have - no real impact on the classification procedure. - - :param max_value: Maximal expected value. - TODO: This might be more optimal, - but max-invariant algorithm will be nicer. - :param size: Number of values participating in this group. - :param avg: Population average of the participating sample values. - :param stdev: Population standard deviation of the sample values. - :param prev_avg: Population average of the previous group. - If None, no previous average is taken into account. - If not None, the given previous average is used to discourage - consecutive groups with similar averages - (opposite triangle distribution is assumed). - :type max_value: float - :type size: int - :type avg: float - :type stdev: float - :type prev_avg: float or None - """ - super(BitCountingMetadata, self).__init__(size, avg, stdev) - self.max_value = max_value - self.prev_avg = prev_avg - self.bits = 0.0 - if self.size < 1: - return - # Length of the sequence must be also counted in bits, - # otherwise the message would not be decodable. - # Model: probability of k samples is 1/k - 1/(k+1) - # == 1/k/(k+1) - self.bits += math.log(size * (size + 1), 2) - if prev_avg is None: - # Avg is considered to be uniformly distributed - # from zero to max_value. - self.bits += math.log(max_value + 1.0, 2) - else: - # Opposite triangle distribution with minimum. - self.bits += math.log( - max_value * (max_value + 1) / (abs(avg - prev_avg) + 1), 2) - if self.size < 2: - return - # Stdev is considered to be uniformly distributed - # from zero to max_value. That is quite a bad expectation, - # but resilient to negative samples etc. - self.bits += math.log(max_value + 1.0, 2) - # Now we know the samples lie on sphere in size-1 dimensions. - # So it is (size-2)-sphere, with radius^2 == stdev^2 * size. - # https://en.wikipedia.org/wiki/N-sphere - sphere_area_ln = math.log(2) + math.log(math.pi) * ((size - 1) / 2.0) - sphere_area_ln -= math.lgamma((size - 1) / 2.0) - sphere_area_ln += math.log(stdev + 1.0) * (size - 2) - sphere_area_ln += math.log(size) * ((size - 2) / 2.0) - self.bits += sphere_area_ln / math.log(2) - - def __str__(self): - """Return string with human readable description of the group. - - :returns: Readable description. - :rtype: str - """ - return "size={size} avg={avg} stdev={stdev} bits={bits}".format( - size=self.size, avg=self.avg, stdev=self.stdev, bits=self.bits) - - def __repr__(self): - """Return string executable as Python constructor call. - - :returns: Executable constructor call. - :rtype: str - """ - return ("BitCountingMetadata(max_value={max_value},size={size}," + - "avg={avg},stdev={stdev},prev_avg={prev_avg})").format( - max_value=self.max_value, size=self.size, avg=self.avg, - stdev=self.stdev, prev_avg=self.prev_avg) diff --git a/PyPI/jumpavg/jumpavg/BitCountingMetadataFactory.py b/PyPI/jumpavg/jumpavg/BitCountingMetadataFactory.py deleted file mode 100644 index 567c3d4fe6..0000000000 --- a/PyPI/jumpavg/jumpavg/BitCountingMetadataFactory.py +++ /dev/null @@ -1,85 +0,0 @@ -# Copyright (c) 2018 Cisco and/or its affiliates. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at: -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Module holding BitCountingMetadataFactory class.""" - -import math - -from AvgStdevMetadata import AvgStdevMetadata -from AvgStdevMetadataFactory import AvgStdevMetadataFactory -from BitCountingMetadata import BitCountingMetadata - - -class BitCountingMetadataFactory(object): - """Class for factory which creates bit counting metadata from data. - - TODO: Summarize the methods? - """ - - @staticmethod - def find_max_value(values): - """Return the max value. - - This is a separate helper method, - because the whole set of values is usually larger than in from_data(). - - :param values: Run values to be processed. - :type values: Iterable of float - :returns: 0.0 or the biggest value found. - :rtype: float - """ - max_value = 0.0 - for value in values: - if isinstance(value, AvgStdevMetadata): - value = value.avg - if value > max_value: - max_value = value - return max_value - - def __init__(self, max_value, prev_avg=None): - """Construct the factory instance with given arguments. - - :param max_value: Maximal expected value. - :param prev_avg: Population average of the previous group. - If None, no previous average is taken into account. - If not None, the given previous average is used to discourage - consecutive groups with similar averages - (opposite triangle distribution is assumed). - :type max_value: float - :type prev_avg: float or None - """ - self.max_value = max_value - self.prev_avg = prev_avg - - def from_avg_stdev_metadata(self, metadata): - """Return new metadata object by adding bits to existing metadata. - - :param metadata: Metadata to count bits for. - :type metadata: AvgStdevMetadata - :returns: The metadata with bits counted. - :rtype: BitCountingMetadata - """ - return BitCountingMetadata( - max_value=self.max_value, size=metadata.size, - avg=metadata.avg, stdev=metadata.stdev, prev_avg=self.prev_avg) - - def from_data(self, values): - """Return new metadata object fitting the values. - - :param values: Run values to be processed. - :type values: Iterable of float or of AvgStdevMetadata - :returns: The metadata matching the values. - :rtype: BitCountingMetadata - """ - metadata = AvgStdevMetadataFactory.from_data(values) - return self.from_avg_stdev_metadata(metadata) diff --git a/PyPI/jumpavg/jumpavg/ClassifiedBitCountingMetadata.py b/PyPI/jumpavg/jumpavg/ClassifiedBitCountingMetadata.py deleted file mode 100644 index 29359f0908..0000000000 --- a/PyPI/jumpavg/jumpavg/ClassifiedBitCountingMetadata.py +++ /dev/null @@ -1,73 +0,0 @@ -# Copyright (c) 2018 Cisco and/or its affiliates. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at: -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Module holding ClassifiedBitCountingMetadata class.""" - -from BitCountingMetadata import BitCountingMetadata - - -class ClassifiedBitCountingMetadata(BitCountingMetadata): - """Class for metadata which includes classification. - - TODO: Can we create ClassifiedMetadata and inherit (also) from that? - """ - - def __init__( - self, max_value, size=0, avg=0.0, stdev=0.0, prev_avg=None, - classification=None): - """Delegate to ancestor constructors and set classification. - - :param max_value: Maximal expected value. - :param size: Number of values participating in this group. - :param avg: Population average of the participating sample values. - :param stdev: Population standard deviation of the sample values. - :param prev_avg: Population average of the previous group. - If None, no previous average is taken into account. - If not None, the given previous average is used to discourage - consecutive groups with similar averages - (opposite triangle distribution is assumed). - :param classification: Arbitrary object classifying this group. - :type max_value: float - :type size: int - :type avg: float - :type stdev: float - :type prev_avg: float - :type classification: object - """ - super(ClassifiedBitCountingMetadata, self).__init__( - max_value, size, avg, stdev, prev_avg) - self.classification = classification - - def __str__(self): - """Return string with human readable description of the group. - - :returns: Readable description. - :rtype: str - """ - # str(super(...)) describes the proxy, not the proxied object. - super_str = super(ClassifiedBitCountingMetadata, self).__str__() - return super_str + " classification={classification}".format( - classification=self.classification) - - def __repr__(self): - """Return string executable as Python constructor call. - - :returns: Executable constructor call. - :rtype: str - """ - return ("ClassifiedBitCountingMetadata(max_value={max_value}," + - "size={size},avg={avg},stdev={stdev},prev_avg={prev_avg}," + - "classification={cls})").format( - max_value=self.max_value, size=self.size, avg=self.avg, - stdev=self.stdev, prev_avg=self.prev_avg, - cls=self.classification) diff --git a/PyPI/jumpavg/jumpavg/ClassifiedMetadataFactory.py b/PyPI/jumpavg/jumpavg/ClassifiedMetadataFactory.py deleted file mode 100644 index 7fdea7c162..0000000000 --- a/PyPI/jumpavg/jumpavg/ClassifiedMetadataFactory.py +++ /dev/null @@ -1,42 +0,0 @@ -# Copyright (c) 2018 Cisco and/or its affiliates. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at: -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Module holding ClassifiedBitCountingMetadata class.""" - -from ClassifiedBitCountingMetadata import ClassifiedBitCountingMetadata - - -class ClassifiedMetadataFactory(object): - """Class for factory which adds classification to bit counting metadata.""" - - @staticmethod - def with_classification(metadata, classification): - """Return new metadata object with added classification. - - TODO: Is there a way to add classification to any metadata, - without messing up constructors and __repr__()? - - FIXME: Factories take raw resources. Find a name for the thing - which takes semi-finished products. Transformer? - - :param metadata: Existing metadata without classification. - :param classification: Arbitrary object classifying this group. - :type metadata: BitCountingMetadata - :type classification: object - :returns: The metadata with added classification. - :rtype: ClassifiedBitCountingMetadata - """ - return ClassifiedBitCountingMetadata( - max_value=metadata.max_value, size=metadata.size, avg=metadata.avg, - stdev=metadata.stdev, prev_avg=metadata.prev_avg, - classification=classification) diff --git a/PyPI/jumpavg/jumpavg/RunGroup.py b/PyPI/jumpavg/jumpavg/RunGroup.py deleted file mode 100644 index 9de8ae8919..0000000000 --- a/PyPI/jumpavg/jumpavg/RunGroup.py +++ /dev/null @@ -1,34 +0,0 @@ -# Copyright (c) 2018 Cisco and/or its affiliates. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at: -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Module holding RunGroup class.""" - - -class RunGroup(object): - """Effectively a named touple of data and metadata. - - TODO: This feels like an abstract class. - Most uses assume restrictions on metadata type. - Can this be defined similarly to C++ templates? - """ - - def __init__(self, metadata, values): - """Create the group from metadata and values. - - :param metadata: Metadata object to associate with the group. - :param values: The runs belonging to this group. - :type metadata: AbstractGroupMetadata - :type values: Iterable of float or od AvgStdevMetadata - """ - self.metadata = metadata - self.values = values diff --git a/PyPI/jumpavg/setup.py b/PyPI/jumpavg/setup.py index 206373ac47..c80e38f069 100644 --- a/PyPI/jumpavg/setup.py +++ b/PyPI/jumpavg/setup.py @@ -1,40 +1,47 @@ +#!/usr/bin/env python3 + """A setup module for setuptools. See: https://packaging.python.org/en/latest/distributing.html """ -from setuptools import setup, find_packages +from setuptools import (setup, find_packages) from os import path from io import open here = path.abspath(path.dirname(__file__)) -with open(path.join(here, "README.rst"), encoding="utf-8") as f: +with open(path.join(here, u"README.rst"), encoding=u"utf-8") as f: long_description = f.read() setup( - name="jumpavg", - version="0.1.4", # This is currently the only place listing the version. - description="Library for finding changes in time series by grouping results.", + name=u"jumpavg", + version=u"0.2.0", # This is currently the only place listing the version. + description=( + u"Library for locating changes in time series by grouping results." + ), long_description=long_description, - long_description_content_type="text/x-rst", + long_description_content_type=u"text/x-rst", # TODO: Create a separate webpage for jumpavg library. - url="https://gerrit.fd.io/r/gitweb?p=csit.git;a=tree;f=PyPI/jumpavg;hb=refs/heads/master", - author="Cisco Systems Inc. and/or its affiliates", - author_email="csit-dev@lists.fd.io", + url=( + u"https://gerrit.fd.io/r/gitweb?p=csit.git;a=tree;f=PyPI/jumpavg" + u";hb=refs/heads/master" + ), + author=u"Cisco Systems Inc. and/or its affiliates", + author_email=u"csit-dev@lists.fd.io", classifiers=[ - "Development Status :: 3 - Alpha", - "Intended Audience :: Science/Research", + u"Development Status :: 3 - Alpha", + u"Intended Audience :: Science/Research", # Pick your license as you wish - "License :: OSI Approved :: Apache Software License", - "Natural Language :: English", + u"License :: OSI Approved :: Apache Software License", + u"Natural Language :: English", # TODO: Test which Python versions is the code compatible with. - "Programming Language :: Python :: 2.7", - "Topic :: Scientific/Engineering :: Information Analysis" + u"Programming Language :: Python :: 2.7", + u"Topic :: Scientific/Engineering :: Information Analysis" ], - keywords="progression regression anomaly detection", + keywords=u"progression regression anomaly detection statistics bits", packages=find_packages(exclude=[]), - # TODO: python_requires="~=2.7" + python_requires="~=3.6" install_requires=[], # TODO: Include simulator and tests. extras_require={ @@ -42,11 +49,14 @@ setup( package_data={ }, entry_points={ - "console_scripts": [ + u"console_scripts": [ ], }, project_urls={ - "Bug Reports": "https://jira.fd.io/projects/CSIT/issues", - "Source": "https://gerrit.fd.io/r/gitweb?p=csit.git;a=tree;f=PyPI/jumpavg;hb=refs/heads/master", + u"Bug Reports": u"https://jira.fd.io/projects/CSIT/issues", + u"Source": ( + u"https://gerrit.fd.io/r/gitweb?p=csit.git;a=tree;f=PyPI/jumpavg" + u";hb=refs/heads/master" + ), }, ) diff --git a/resources/libraries/bash/function/per_patch.sh b/resources/libraries/bash/function/per_patch.sh index 919789b5f4..ea7ea4f837 100644 --- a/resources/libraries/bash/function/per_patch.sh +++ b/resources/libraries/bash/function/per_patch.sh @@ -108,11 +108,12 @@ function compare_test_results () { set -exuo pipefail cd "${VPP_DIR}" || die "Change directory operation failed." - # Reusing CSIT main virtualenv. + # Ply is installed as system level package, but not seen for some reason. pip3 install -r "${PYTHON_SCRIPTS_DIR}/perpatch_requirements.txt" || { - die "Perpatch Python requirements installation failed." + die "Compare script Python requirements installation failed." } - python3 "${PYTHON_SCRIPTS_DIR}/compare_perpatch.py" + # Reusing CSIT main virtualenv. + python3 "${TOOLS_DIR}/integrated/compare_perpatch.py" # The exit code determines the vote result. } diff --git a/resources/libraries/python/jumpavg/AvgStdevStats.py b/resources/libraries/python/jumpavg/AvgStdevStats.py new file mode 100644 index 0000000000..9a8decd932 --- /dev/null +++ b/resources/libraries/python/jumpavg/AvgStdevStats.py @@ -0,0 +1,113 @@ +# Copyright (c) 2019 Cisco and/or its affiliates. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Module holding AvgStdevStats class.""" + +import math + + +class AvgStdevStats: + """Class for statistics which include average and stdev of a group. + + Contrary to other stats types, adding values to the group + is computationally light without any caching. + + Instances are only statistics, the data itself is stored elsewhere. + """ + + def __init__(self, size=0, avg=0.0, stdev=0.0): + """Construct the stats object by storing the values needed. + + Each value has to be numeric. + The values are not sanitized depending on size, wrong initialization + can cause delayed math errors. + + :param size: Number of values participating in this group. + :param avg: Population average of the participating sample values. + :param stdev: Population standard deviation of the sample values. + :type size: int + :type avg: float + :type stdev: float + """ + self.size = size + self.avg = avg + self.stdev = stdev + + def __str__(self): + """Return string with human readable description of the group. + + :returns: Readable description. + :rtype: str + """ + return f"size={self.size} avg={self.avg} stdev={self.stdev}" + + def __repr__(self): + """Return string executable as Python constructor call. + + :returns: Executable constructor call. + :rtype: str + """ + return ( + f"AvgStdevStats(size={self.size!r},avg={self.avg!r}" + f",stdev={self.stdev!r})" + ) + + @classmethod + def for_runs(cls, runs): + """Return new stats instance describing the sequence of runs. + + If you want to append data to existing stats object, + you can simply use the stats object as the first run. + + Instead of a verb, "for" is used to start this method name, + to signify the result contains less information than the input data. + + Here, Run is a hypothetical abstract class, an union of float and cls. + Defining that as a real abstract class in Python 2 is too much hassle. + + :param runs: Sequence of data to describe by the new metadata. + :type runs: Iterable[Union[float, cls]] + :returns: The new stats instance. + :rtype: cls + """ + # Using Welford method to be more resistant to rounding errors. + # Adapted from code for sample standard deviation at: + # https://www.johndcook.com/blog/standard_deviation/ + # The logic of plus operator is taken from + # https://www.johndcook.com/blog/skewness_kurtosis/ + total_size = 0 + total_avg = 0.0 + moment_2 = 0.0 + for run in runs: + if isinstance(run, (float, int)): + run_size = 1 + run_avg = run + run_stdev = 0.0 + else: + run_size = run.size + run_avg = run.avg + run_stdev = run.stdev + old_total_size = total_size + delta = run_avg - total_avg + total_size += run_size + total_avg += delta * run_size / total_size + moment_2 += run_stdev * run_stdev * run_size + moment_2 += delta * delta * old_total_size * run_size / total_size + if total_size < 1: + # Avoid division by zero. + return cls(size=0) + # TODO: Is it worth tracking moment_2 instead, and compute and cache + # stdev on demand, just to possibly save some sqrt calls? + total_stdev = math.sqrt(moment_2 / total_size) + ret_obj = cls(size=total_size, avg=total_avg, stdev=total_stdev) + return ret_obj diff --git a/resources/libraries/python/jumpavg/BitCountingGroup.py b/resources/libraries/python/jumpavg/BitCountingGroup.py new file mode 100644 index 0000000000..0c1aabba30 --- /dev/null +++ b/resources/libraries/python/jumpavg/BitCountingGroup.py @@ -0,0 +1,173 @@ +# Copyright (c) 2019 Cisco and/or its affiliates. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Module holding BitCountingGroup class.""" + +import copy + +from .AvgStdevStats import AvgStdevStats +from .BitCountingStats import BitCountingStats + + +class BitCountingGroup: + # TODO: Inherit from collections.abc.Sequence in Python 3. + """Group of runs which tracks bit count in an efficient manner. + + This class contains methods that mutate the internal state, + use copy() method to save the previous state. + + The Sequence-like access is related to the list of runs, + for example group[0] returns the first run in the list. + Writable list-like methods are not implemented. + + As the group bit count depends on previous average + and overall maximal value, those values are assumed + to be known beforehand (and immutable). + + As the caller is allowed to divide runs into groups in any way, + a method to add a single run in an efficient manner is provided. + """ + + def __init__(self, run_list=None, stats=None, bits=None, + max_value=None, prev_avg=None, comment="unknown"): + """Set the internal state and partially the stats. + + A "group" stands for an Iterable of runs, where "run" is either + a float value, or a stats-like object (only size, avg and stdev + are accessed). Run is a hypothetical abstract class, + defining it in Python 2 is too much hassle. + + Only a copy of the run list argument value is stored in the instance, + so it is not a problem if the value object is mutated afterwards. + + It is not verified whether the user provided values are valid, + e.g. whether the stats and bits values reflect the runs. + + :param run_list: List of run to compose into this group. Default: empty. + :param stats: Stats object used for computing bits. + :param bits: Cached value of information content. + :param max_value: Maximal sample value to be used for computing. + :param prev_avg: Average of the previous group, affects bits. + :param comment: Any string giving more info, e.g. "regression". + :type run_list: Iterable[Run] + :type stats: Optional[AvgStdevStats] + :type bits: Optional[float] + :type max_value: float + :type prev_avg: Optional[float] + :type comment: str + """ + self.run_list = copy.deepcopy(run_list) if run_list else list() + self.stats = stats + self.cached_bits = bits + self.max_value = max_value + self.prev_avg = prev_avg + self.comment = comment + if self.stats is None: + self.stats = AvgStdevStats.for_runs(self.run_list) + + def __str__(self): + """Return string with human readable description of the group. + + :returns: Readable description. + :rtype: str + """ + return f"stats={self.stats} bits={self.cached_bits}" + + def __repr__(self): + """Return string executable as Python constructor call. + + :returns: Executable constructor call. + :rtype: str + """ + return ( + f"BitCountingGroup(run_list={self.run_list!r},stats={self.stats!r}" + f",bits={self.cached_bits!r},max_value={self.max_value!r}" + f",prev_avg={self.prev_avg!r},comment={self.comment!r})" + ) + + def __getitem__(self, index): + """Return the run at the index. + + :param index: Index of the run to return. + :type index: int + :returns: The run at the index. + :rtype: Run + """ + return self.run_list[index] + + def __len__(self): + """Return the number of runs in the group. + + :returns: The Length of run_list. + :rtype: int + """ + return len(self.run_list) + + def copy(self): + """Return a new instance with copied internal state. + + :returns: The copied instance. + :rtype: BitCountingGroup + """ + stats = AvgStdevStats.for_runs([self.stats]) + return self.__class__( + run_list=self.run_list, stats=stats, bits=self.cached_bits, + max_value=self.max_value, prev_avg=self.prev_avg, + comment=self.comment) + + @property + def bits(self): + """Return overall bit content of the group list. + + If not cached, compute from stats and cache. + + :returns: The overall information content in bits. + :rtype: float + """ + if self.cached_bits is None: + self.cached_bits = BitCountingStats.for_runs( + [self.stats], self.max_value, self.prev_avg).bits + return self.cached_bits + + def append(self, run): + """Mutate to add the new run, return self. + + Stats are updated, but old bits value is deleted from cache. + + :param run: The run value to add to the group. + :type value: Run + :returns: The updated self. + :rtype: BitCountingGroup + """ + self.run_list.append(run) + self.stats = AvgStdevStats.for_runs([self.stats, run]) + self.cached_bits = None + return self + + def extend(self, runs): + """Mutate to add the new runs, return self. + + This is saves small amount of computation + compared to adding runs one by one in a loop. + + Stats are updated, but old bits value is deleted from cache. + + :param runs: The runs to add to the group. + :type value: Iterable[Run] + :returns: The updated self. + :rtype: BitCountingGroup + """ + self.run_list.extend(runs) + self.stats = AvgStdevStats.for_runs([self.stats] + runs) + self.cached_bits = None + return self diff --git a/resources/libraries/python/jumpavg/BitCountingGroupList.py b/resources/libraries/python/jumpavg/BitCountingGroupList.py new file mode 100644 index 0000000000..bcc5e43267 --- /dev/null +++ b/resources/libraries/python/jumpavg/BitCountingGroupList.py @@ -0,0 +1,185 @@ +# Copyright (c) 2019 Cisco and/or its affiliates. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Module holding BitCountingGroupList class.""" + +import copy + +from .BitCountingGroup import BitCountingGroup + + +class BitCountingGroupList: + # TODO: Inherit from collections.abc.Sequence in Python 3. + """List of data groups which tracks overall bit count. + + The Sequence-like access is related to the list of groups, + for example group_list[0] returns the first group in the list. + Writable list-like methods are not implemented. + + The overall bit count is the sum of bit counts of each group. + Group is a sequence of data samples accompanied by their stats. + Different partitioning of data samples into the groups + results in different overall bit count. + This can be used to group samples in various contexts. + + As the group bit count depends on previous average + and overall maximal value, order of groups is important. + Having the logic encapsulated here spares the caller + the effort to pass averages around. + + The data can be only added, and there is some logic to skip + recalculations if the bit count is not needed. + """ + + def __init__(self, group_list=None, bits_except_last=0.0, max_value=None): + """Set the internal state without any calculations. + + The group list argument is copied deeply, so it is not a problem + if the value object is mutated afterwards. + + A "group" stands for an Iterable of runs, where "run" is either + a float value, or a stats-like object (only size, avg and stdev + are accessed). Run is a hypothetical abstract class, + defining it in Python 2 is too much hassle. + + It is not verified whether the user provided values are valid, + e.g. whether the cached bits values make sense. + + The max_value is required and immutable, + it is recommended the callers find their maximum beforehand. + + :param group_list: List of groups to compose this group list (or empty). + :param bits_except_last: Partial sum of all but one group bits. + :param max_value: Maximal sample value to base bits computation on. + :type group_list: Iterable[BitCountingGroup] + :type bits_except_last: float + :type max_value: float + """ + self.group_list = copy.deepcopy(group_list) if group_list else list() + self.bits_except_last = bits_except_last + self.max_value = max_value + + def __str__(self): + """Return string with human readable description of the group list. + + :returns: Readable description. + :rtype: str + """ + return u"group_list={self.group_list} bits={self.bits}" + + def __repr__(self): + """Return string executable as Python constructor call. + + :returns: Executable constructor call. + :rtype: str + """ + return ( + f"BitCountingGroupList(group_list={self.group_list!r}" + f",bits_except_last={self.bits_except_last!r}" + f",max_value={self.max_value!r})" + ) + + def __getitem__(self, index): + """Return the group at the index. + + :param index: Index of the group to return. + :type index: int + :returns: The group at the index. + :rtype: BitCountingGroup + """ + return self.group_list[index] + + def __len__(self): + """Return the length of the group list. + + :returns: The Length of group_list. + :rtype: int + """ + return len(self.group_list) + + def copy(self): + """Return a new instance with copied internal state. + + :returns: The copied instance. + :rtype: BitCountingGroupList + """ + return self.__class__( + group_list=self.group_list, bits_except_last=self.bits_except_last, + max_value=self.max_value + ) + + @property + def bits(self): + """Return overall bit content of the group list. + + :returns: The overall information content in bits. + :rtype: float + """ + if not self.group_list: + return 0.0 + # TODO: Is it worth to cache the overall result? + return self.bits_except_last + self.group_list[-1].bits + + def append_group_of_runs(self, runs): + """Mutate to add a new group based on the runs, return self. + + The argument is copied before adding to the group list, + so further edits do not affect the grup list. + The argument can also be a group, only runs from it are used. + + :param runs: Runs to form the next group to be appended to self. + :type runs: Union[Iterable[Run], BitCountingGroup] + :returns: The updated self. + :rtype: BitCountingGroupList + """ + prev_avg = self.group_list[-1].stats.avg if self.group_list else None + if isinstance(runs, BitCountingGroup): + # It is faster to avoid stats recalculation. + new_group = runs.copy() + new_group.max_value = self.max_value + new_group.prev_avg = prev_avg + new_group.cached_bits = None + else: + new_group = BitCountingGroup( + run_list=runs, max_value=self.max_value, prev_avg=prev_avg) + self.bits_except_last = self.bits + self.group_list.append(new_group) + return self + + def append_run_to_to_last_group(self, run): + """Mutate to add new run at the end of the last group. + + Basically a one-liner, only returning group list instead of last group. + + :param run: The run value to add to the last group. + :type run: Run + :returns: The updated self. + :rtype: BitCountingGroupList + :raises IndexError: If group list is empty, no last group to add to. + """ + self.group_list[-1].append(run) + return self + + def extend_runs_to_last_group(self, runs): + """Mutate to add new runs to the end of the last group. + + A faster alternative to appending runs one by one in a loop. + + :param runs: The runs to add to the last group. + :type runs: Iterable[Run] + :returns: The updated self + :rtype: BitCountingGroupList + :raises IndexError: If group list is empty, no last group to add to. + """ + self.group_list[-1].extend(runs) + return self diff --git a/resources/libraries/python/jumpavg/BitCountingStats.py b/resources/libraries/python/jumpavg/BitCountingStats.py new file mode 100644 index 0000000000..0addec013b --- /dev/null +++ b/resources/libraries/python/jumpavg/BitCountingStats.py @@ -0,0 +1,169 @@ +# Copyright (c) 2019 Cisco and/or its affiliates. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Module holding BitCountingStats class.""" + +import math + +from .AvgStdevStats import AvgStdevStats + + +class BitCountingStats(AvgStdevStats): + """Class for statistics which include information content of a group. + + The information content is based on an assumption that the data + consists of independent random values from a normal distribution. + + Instances are only statistics, the data itself is stored elsewhere. + + The coding needs to know the previous average, and a maximal value + so both values are required as inputs. + + This is a subclass of AvgStdevStats, even though all methods are overriden. + Only for_runs method calls the parent implementation, without using super(). + """ + + def __init__( + self, size=0, avg=None, stdev=0.0, max_value=None, prev_avg=None): + """Construct the stats object by computing from the values needed. + + The values are not sanitized, faulty callers can cause math errors. + + The None values are allowed for stats for zero size data, + but such stats can report arbitrary avg and max_value. + Stats for nonzero size data cannot contain None, + else ValueError is raised. + + The max_value needs to be numeric for nonzero size, + but its relations to avg and prev_avg are not examined. + + The bit count is not real, as that would depend on numeric precision + (number of significant bits in values). + The difference is assumed to be constant per value, + which is consistent with Gauss distribution + (but not with floating point mechanic). + The hope is the difference will have + no real impact on the classification procedure. + + :param size: Number of values participating in this group. + :param avg: Population average of the participating sample values. + :param stdev: Population standard deviation of the sample values. + :param max_value: Maximal expected value. + TODO: This might be more optimal, + but max-invariant algorithm will be nicer. + :param prev_avg: Population average of the previous group. + If None, no previous average is taken into account. + If not None, the given previous average is used to discourage + consecutive groups with similar averages + (opposite triangle distribution is assumed). + :type avg: float + :type size: int + :type stdev: float + :type max_value: Union[float, NoneType] + :type prev_avg: Union[float, NoneType] + """ + self.avg = avg + self.size = size + self.stdev = stdev + self.max_value = max_value + self.prev_avg = prev_avg + # Zero size should in principle have non-zero bits (coding zero size), + # but zero allows users to add empty groups without affecting bits. + self.bits = 0.0 + if self.size < 1: + return + if avg is None: + raise ValueError(f"Avg is None: {self!r}") + if max_value is None or max_value <= 0.0: + raise ValueError(f"Invalid max value: {self!r}") + # Length of the sequence must be also counted in bits, + # otherwise the message would not be decodable. + # Model: probability of k samples is 1/k - 1/(k+1) == 1/k/(k+1) + # This is compatible with zero size leading to zero bits. + self.bits += math.log(size * (size + 1), 2) + if prev_avg is None: + # Avg is considered to be uniformly distributed + # from zero to max_value. + self.bits += math.log(max_value + 1.0, 2) + else: + # Opposite triangle distribution with minimum. + self.bits += math.log( + max_value * (max_value + 1) / (abs(avg - prev_avg) + 1), 2) + if self.size < 2: + return + # Stdev is considered to be uniformly distributed + # from zero to max_value. That is quite a bad expectation, + # but resilient to negative samples etc. + self.bits += math.log(max_value + 1.0, 2) + # Now we know the samples lie on sphere in size-1 dimensions. + # So it is (size-2)-sphere, with radius^2 == stdev^2 * size. + # https://en.wikipedia.org/wiki/N-sphere + sphere_area_ln = math.log(2) + math.log(math.pi) * ((size - 1) / 2.0) + sphere_area_ln -= math.lgamma((size - 1) / 2.0) + sphere_area_ln += math.log(stdev + 1.0) * (size - 2) + sphere_area_ln += math.log(size) * ((size - 2) / 2.0) + self.bits += sphere_area_ln / math.log(2) + + def __str__(self): + """Return string with human readable description of the group. + + :returns: Readable description. + :rtype: str + """ + return ( + f"size={self.size} avg={self.avg} stdev={self.stdev}" + f" bits={self.bits}" + ) + + def __repr__(self): + """Return string executable as Python constructor call. + + :returns: Executable constructor call. + :rtype: str + """ + return ( + f"BitCountingStats(size={self.size!r},avg={self.avg!r}" + f",stdev={self.stdev!r},max_value={self.max_value!r}" + f",prev_avg={self.prev_avg!r})" + ) + + @classmethod + def for_runs(cls, runs, max_value=None, prev_avg=None): + """Return new stats instance describing the sequence of runs. + + If you want to append data to existing stats object, + you can simply use the stats object as the first run. + + Instead of a verb, "for" is used to start this method name, + to signify the result contains less information than the input data. + + The two optional values can come from outside of the runs provided. + + The max_value cannot be None for non-zero size data. + The implementation does not check if no datapoint exceeds max_value. + + TODO: Document the behavior for zero size result. + + :param runs: Sequence of data to describe by the new metadata. + :param max_value: Maximal expected value. + :param prev_avg: Population average of the previous group, if any. + :type runs: Iterable[Union[float, AvgStdevStats]] + :type max_value: Union[float, NoneType] + :type prev_avg: Union[float, NoneType] + :returns: The new stats instance. + :rtype: cls + """ + asd = AvgStdevStats.for_runs(runs) + ret_obj = cls(size=asd.size, avg=asd.avg, stdev=asd.stdev, + max_value=max_value, prev_avg=prev_avg) + return ret_obj diff --git a/PyPI/jumpavg/jumpavg/__init__.py b/resources/libraries/python/jumpavg/__init__.py similarity index 68% rename from PyPI/jumpavg/jumpavg/__init__.py rename to resources/libraries/python/jumpavg/__init__.py index 8e41ed9fe2..cb8b3df43d 100644 --- a/PyPI/jumpavg/jumpavg/__init__.py +++ b/resources/libraries/python/jumpavg/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2018 Cisco and/or its affiliates. +# Copyright (c) 2019 Cisco and/or its affiliates. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: @@ -14,3 +14,9 @@ """ __init__ file for "jumpavg" Python package. """ + +from .AvgStdevStats import AvgStdevStats +from .BitCountingStats import BitCountingStats +from .BitCountingGroup import BitCountingGroup +from .BitCountingGroupList import BitCountingGroupList +from .classify import classify diff --git a/resources/libraries/python/jumpavg/classify.py b/resources/libraries/python/jumpavg/classify.py new file mode 100644 index 0000000000..5f5ce6160c --- /dev/null +++ b/resources/libraries/python/jumpavg/classify.py @@ -0,0 +1,76 @@ +# Copyright (c) 2019 Cisco and/or its affiliates. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Module holding the classify function + +Classification os one of primary purposes of this package. + +Minimal message length principle is used +for grouping results into the list of groups, +assuming each group is a population of different Gaussian distribution. +""" + +from .AvgStdevStats import AvgStdevStats +from .BitCountingGroupList import BitCountingGroupList + + +def classify(values): + """Return the values in groups of optimal bit count. + + Here, a value is either a float, or an iterable of floats. + Such iterables represent an undivisible sequence of floats. + + Internally, such sequence is replaced by AvgStdevStats + after maximal value is found. + + :param values: Sequence of runs to classify. + :type values: Iterable[Union[float, Iterable[float]]] + :returns: Classified group list. + :rtype: BitCountingGroupList + """ + processed_values = list() + max_value = 0.0 + for value in values: + if isinstance(value, (float, int)): + if value > max_value: + max_value = value + processed_values.append(value) + else: + for subvalue in value: + if subvalue > max_value: + max_value = subvalue + processed_values.append(AvgStdevStats.for_runs(value)) + open_at = list() + closed_before = [BitCountingGroupList(max_value=max_value)] + for index, value in enumerate(processed_values): + newly_open = closed_before[index].copy() + newly_open.append_group_of_runs([value]) + open_at.append(newly_open) + record_group_list = newly_open + for previous_index, old_open in enumerate(open_at[:index]): + new_open = old_open.copy().append_run_to_to_last_group(value) + open_at[previous_index] = new_open + if new_open.bits < record_group_list.bits: + record_group_list = new_open + closed_before.append(record_group_list) + partition = closed_before[-1] + previous_average = partition[0].stats.avg + for group in partition: + if group.stats.avg == previous_average: + group.comment = u"normal" + elif group.stats.avg < previous_average: + group.comment = u"regression" + elif group.stats.avg > previous_average: + group.comment = u"progression" + previous_average = group.stats.avg + return partition diff --git a/resources/tools/doc_gen/.gitignore b/resources/tools/doc_gen/.gitignore index beb0d8f0cf..02254b9e4c 100644 --- a/resources/tools/doc_gen/.gitignore +++ b/resources/tools/doc_gen/.gitignore @@ -1,2 +1,3 @@ _build/** **/*tar.gz +/tmp diff --git a/resources/tools/scripts/compare_perpatch.py b/resources/tools/integrated/compare_perpatch.py similarity index 73% rename from resources/tools/scripts/compare_perpatch.py rename to resources/tools/integrated/compare_perpatch.py index c2c165c76d..dd15490506 100644 --- a/resources/tools/scripts/compare_perpatch.py +++ b/resources/tools/integrated/compare_perpatch.py @@ -25,8 +25,7 @@ this script votes -1 (by exiting with code 1), otherwise it votes +1 (exit 0). import json import sys -from jumpavg.BitCountingMetadataFactory import BitCountingMetadataFactory -from jumpavg.BitCountingClassifier import BitCountingClassifier +from resources.libraries.python import jumpavg def hack(value_list): @@ -75,10 +74,8 @@ while 1: sys.exit(1) parent_iterations.append(parent_lines) current_iterations.append(current_lines) -classifier = BitCountingClassifier() exit_code = 0 for test_index in range(num_tests): - val_max = 1.0 parent_values = list() current_values = list() for iteration_index in range(len(parent_iterations)): @@ -92,35 +89,42 @@ for test_index in range(num_tests): print(f"Time-ordered MRR values for current build: {current_values}") parent_values = hack(parent_values) current_values = hack(current_values) - parent_max = BitCountingMetadataFactory.find_max_value(parent_values) - current_max = BitCountingMetadataFactory.find_max_value(current_values) - val_max = max(val_max, parent_max, current_max) - factory = BitCountingMetadataFactory(val_max) - parent_stats = factory.from_data(parent_values) - current_factory = BitCountingMetadataFactory(val_max, parent_stats.avg) - current_stats = current_factory.from_data(current_values) - both_stats = factory.from_data(parent_values + current_values) + max_value = max([1.0] + parent_values + current_values) + parent_stats = jumpavg.AvgStdevStats.for_runs(parent_values) + current_stats = jumpavg.AvgStdevStats.for_runs(current_values) + parent_group_list = jumpavg.BitCountingGroupList( + max_value=max_value).append_group_of_runs([parent_stats]) + combined_group_list = parent_group_list.copy().extend_runs_to_last_group( + [current_stats]) + separated_group_list = parent_group_list.append_group_of_runs( + [current_stats]) print(f"Value-ordered MRR values for parent build: {parent_values}") print(f"Value-ordered MRR values for current build: {current_values}") - difference = (current_stats.avg - parent_stats.avg) / parent_stats.avg - print(f"Difference of averages relative to parent: {100 * difference}%") + avg_diff = (current_stats.avg - parent_stats.avg) / parent_stats.avg + print(f"Difference of averages relative to parent: {100 * avg_diff}%") print(f"Jumpavg representation of parent group: {parent_stats}") print(f"Jumpavg representation of current group: {current_stats}") - print(f"Jumpavg representation of both as one group: {both_stats}") - bits = parent_stats.bits + current_stats.bits - both_stats.bits - compared = u"longer" if bits >= 0 else u"shorter" print( - f"Separate groups are {compared} than single group by {abs(bits)} bits" + f"Jumpavg representation of both as one group:" + f" {combined_group_list[0].stats}" ) - classified_list = classifier.classify([parent_stats, current_stats]) + bits_diff = separated_group_list.bits - combined_group_list.bits + compared = u"longer" if bits_diff >= 0 else u"shorter" + print( + f"Separate groups are {compared} than single group" + f" by {abs(bits_diff)} bits" + ) + # TODO: Version of classify that takes max_value and list of stats? + # That matters if only stats (not list of floats) are given. + classified_list = jumpavg.classify([parent_values, current_values]) if len(classified_list) < 2: print(f"Test test_index {test_index}: normal (no anomaly)") continue - anomaly = classified_list[1].metadata.classification + anomaly = classified_list[1].comment if anomaly == u"regression": print(f"Test test_index {test_index}: anomaly regression") - exit_code = 1 + exit_code = 3 # 1 or 2 can be caused by other errors continue print(f"Test test_index {test_index}: anomaly {anomaly}") -print(f"Exit code {exit_code}") +print(f"Exit code: {exit_code}") sys.exit(exit_code) diff --git a/resources/tools/scripts/perpatch_requirements.txt b/resources/tools/scripts/perpatch_requirements.txt index 3fca076af2..2275fb82f2 100644 --- a/resources/tools/scripts/perpatch_requirements.txt +++ b/resources/tools/scripts/perpatch_requirements.txt @@ -1,4 +1,4 @@ -# Copyright (c) 2018 Cisco and/or its affiliates. +# Copyright (c) 2019 Cisco and/or its affiliates. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: @@ -11,6 +11,5 @@ # See the License for the specific language governing permissions and # limitations under the License. -# TODO: Convert to use the code from cloned CSIT git, not from pip. -jumpavg==0.1.3 +# TODO: Ply should be installed as system package, fix bootstrap to see it. ply==3.11 -- 2.16.6