From a2a0ab1cdec3567dcad46c2000337707777aa0ca Mon Sep 17 00:00:00 2001 From: Vratko Polak Date: Wed, 13 Jun 2018 13:04:01 +0200 Subject: [PATCH] CSIT-1110: Prepare for migrating the new detection + Do not declare BitCountingClassifier.classify() as class method. + Make BitCountingGroupList subclass of list. + Inherit from abstract classes whenever possible. + Drop unneeded imports. + Add module docstrings and class docstrings anywhere. + Add TODOs hinting at possible improvements. Change-Id: Iccfff5c0e7be0607d6cfa74314083fcfe5a4d7d9 Signed-off-by: Vratko Polak --- .../new/jumpavg/AbstractGroupClassifier.py | 7 +++++ .../new/jumpavg/AbstractGroupMetadata.py | 5 ++++ .../presentation/new/jumpavg/AvgStdevMetadata.py | 6 +++- .../new/jumpavg/AvgStdevMetadataFactory.py | 2 ++ .../new/jumpavg/BitCountingClassifier.py | 19 ++++++++---- .../presentation/new/jumpavg/BitCountingGroup.py | 7 +++++ .../new/jumpavg/BitCountingGroupList.py | 35 ++++++++++++---------- .../new/jumpavg/BitCountingMetadata.py | 9 +++++- .../new/jumpavg/BitCountingMetadataFactory.py | 7 ++++- .../new/jumpavg/ClassifiedBitCountingMetadata.py | 7 ++++- .../new/jumpavg/ClassifiedMetadataFactory.py | 2 +- .../tools/presentation/new/jumpavg/RunGroup.py | 8 +++++ resources/tools/presentation/new/utils.py | 4 +-- 13 files changed, 90 insertions(+), 28 deletions(-) diff --git a/resources/tools/presentation/new/jumpavg/AbstractGroupClassifier.py b/resources/tools/presentation/new/jumpavg/AbstractGroupClassifier.py index 26db758ea8..2612b009da 100644 --- a/resources/tools/presentation/new/jumpavg/AbstractGroupClassifier.py +++ b/resources/tools/presentation/new/jumpavg/AbstractGroupClassifier.py @@ -11,10 +11,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +"""Module holding AbstractGroupClassifier class.""" + from abc import ABCMeta, abstractmethod class AbstractGroupClassifier(object): + """Abstract class defining API for classifier. + + The classifier is an object with classify() method + which divides data into groups containing metadata. + """ __metaclass__ = ABCMeta diff --git a/resources/tools/presentation/new/jumpavg/AbstractGroupMetadata.py b/resources/tools/presentation/new/jumpavg/AbstractGroupMetadata.py index 6084db5a1a..3235dbd485 100644 --- a/resources/tools/presentation/new/jumpavg/AbstractGroupMetadata.py +++ b/resources/tools/presentation/new/jumpavg/AbstractGroupMetadata.py @@ -11,10 +11,15 @@ # See the License for the specific language governing permissions and # limitations under the License. +"""Module holding AbstractGroupMetadata class.""" + from abc import ABCMeta, abstractmethod class AbstractGroupMetadata(object): + """Abstract classdefining API for metadata. + + At this level, only __str__() and __repr() methods are required.""" __metaclass__ = ABCMeta diff --git a/resources/tools/presentation/new/jumpavg/AvgStdevMetadata.py b/resources/tools/presentation/new/jumpavg/AvgStdevMetadata.py index bd7eca1824..efc1a90cd4 100644 --- a/resources/tools/presentation/new/jumpavg/AvgStdevMetadata.py +++ b/resources/tools/presentation/new/jumpavg/AvgStdevMetadata.py @@ -11,8 +11,12 @@ # See the License for the specific language governing permissions and # limitations under the License. +"""Module for holding AvgStdevMetadata class.""" -class AvgStdevMetadata(object): +from AbstractGroupMetadata import AbstractGroupMetadata + + +class AvgStdevMetadata(AbstractGroupMetadata): """Class for metadata specifying the average and standard deviation.""" def __init__(self, size=0, avg=0.0, stdev=0.0): diff --git a/resources/tools/presentation/new/jumpavg/AvgStdevMetadataFactory.py b/resources/tools/presentation/new/jumpavg/AvgStdevMetadataFactory.py index d7d0517a57..6d2e967a88 100644 --- a/resources/tools/presentation/new/jumpavg/AvgStdevMetadataFactory.py +++ b/resources/tools/presentation/new/jumpavg/AvgStdevMetadataFactory.py @@ -11,6 +11,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +"""Module holding AvgStdevMetadataFactory class.""" + import math from AvgStdevMetadata import AvgStdevMetadata diff --git a/resources/tools/presentation/new/jumpavg/BitCountingClassifier.py b/resources/tools/presentation/new/jumpavg/BitCountingClassifier.py index 69b1d65bb2..9a723199d2 100644 --- a/resources/tools/presentation/new/jumpavg/BitCountingClassifier.py +++ b/resources/tools/presentation/new/jumpavg/BitCountingClassifier.py @@ -11,24 +11,31 @@ # See the License for the specific language governing permissions and # limitations under the License. +"""Module holding BitCountingClassifier class. + +This is the main class to be used by callers.""" + +from AbstractGroupClassifier import AbstractGroupClassifier from BitCountingGroup import BitCountingGroup from BitCountingGroupList import BitCountingGroupList from BitCountingMetadataFactory import BitCountingMetadataFactory from ClassifiedMetadataFactory import ClassifiedMetadataFactory -class BitCountingClassifier(object): +class BitCountingClassifier(AbstractGroupClassifier): + """Classifier using Minimal Description Length principle.""" - @staticmethod - def classify(values): + def classify(self, values): """Return the values in groups of optimal bit count. - TODO: Could we return BitCountingGroupList and let caller process it? + The current implementation could be a static method, + but we might support options in later versions, + for example for chosing encodings. :param values: Sequence of runs to classify. :type values: Iterable of float or of AvgStdevMetadata :returns: Classified group list. - :rtype: list of BitCountingGroup + :rtype: BitCountingGroupList """ max_value = BitCountingMetadataFactory.find_max_value(values) factory = BitCountingMetadataFactory(max_value) @@ -60,4 +67,4 @@ class BitCountingClassifier(object): group.metadata = ClassifiedMetadataFactory.with_classification( group.metadata, "progression") previous_average = group.metadata.avg - return partition.group_list + return partition diff --git a/resources/tools/presentation/new/jumpavg/BitCountingGroup.py b/resources/tools/presentation/new/jumpavg/BitCountingGroup.py index 144f5a8780..2071c061ea 100644 --- a/resources/tools/presentation/new/jumpavg/BitCountingGroup.py +++ b/resources/tools/presentation/new/jumpavg/BitCountingGroup.py @@ -11,10 +11,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +"""Module holding BitCountingGroup class.""" + from RunGroup import RunGroup class BitCountingGroup(RunGroup): + """RunGroup with BitCountingMetadata. + + Support with_run_added() method to simplify extending the group. + As bit content has to be re-counted, metadata factory is stored. + """ def __init__(self, metadata_factory, values=[]): """Create the group from metadata factory and values. diff --git a/resources/tools/presentation/new/jumpavg/BitCountingGroupList.py b/resources/tools/presentation/new/jumpavg/BitCountingGroupList.py index 7da0656782..1f69c0635d 100644 --- a/resources/tools/presentation/new/jumpavg/BitCountingGroupList.py +++ b/resources/tools/presentation/new/jumpavg/BitCountingGroupList.py @@ -11,11 +11,26 @@ # See the License for the specific language governing permissions and # limitations under the License. +"""Module holding BitCountingGroupList class.""" + from BitCountingGroup import BitCountingGroup from BitCountingMetadataFactory import BitCountingMetadataFactory -class BitCountingGroupList(object): +class BitCountingGroupList(list): + """List of BitCountingGroup which tracks overall bit count. + + This is useful, as bit count of a subsequent group + depends on average of the previous group. + Having the logic encapsulated here spares the caller + the effort to pass averages around. + + Method with_value_added_to_last_group() delegates to BitCountingGroup, + with_group_appended() adds new group with recalculated bits. + + TODO: last_group.metadata_factory.max_value in with_group_appended() + is ugly, find a more natural class design. + """ def __init__(self, group_list=[], bits=None): """Create a group list from given list of groups. @@ -25,7 +40,7 @@ class BitCountingGroupList(object): :type group_list: list of BitCountingGroup :type bits: float or None """ - self.group_list = group_list + super(BitCountingGroupList, self).__init__(group_list) if bits is not None: self.bits = bits return @@ -34,16 +49,6 @@ class BitCountingGroupList(object): bits += group.metadata.bits self.bits = bits - def __getitem__(self, index): - """Return group at the index. This makes self iterable. - - :param index: The position in the array of groups. - :type index: int - :returns: Group at the position. - :rtype: BitCountingGroup - """ - return self.group_list[index] - def with_group_appended(self, group): """Create and return new group list with given group more than self. @@ -54,7 +59,7 @@ class BitCountingGroupList(object): :returns: New group list with added group. :rtype: BitCountingGroupList """ - group_list = list(self.group_list) + group_list = list(self) if group_list: last_group = group_list[-1] factory = BitCountingMetadataFactory( @@ -73,10 +78,10 @@ class BitCountingGroupList(object): :returns: New group list with the last group updated. :rtype: BitCountingGroupList """ - last_group = self.group_list[-1] + group_list = list(self) + last_group = group_list[-1] bits_before = last_group.metadata.bits last_group = last_group.with_run_added(value) - group_list = list(self.group_list) group_list[-1] = last_group bits = self.bits - bits_before + last_group.metadata.bits return BitCountingGroupList(group_list, bits) diff --git a/resources/tools/presentation/new/jumpavg/BitCountingMetadata.py b/resources/tools/presentation/new/jumpavg/BitCountingMetadata.py index 67d111985f..d25d355cab 100644 --- a/resources/tools/presentation/new/jumpavg/BitCountingMetadata.py +++ b/resources/tools/presentation/new/jumpavg/BitCountingMetadata.py @@ -11,13 +11,20 @@ # See the License for the specific language governing permissions and # limitations under the License. +"""Module holding BitCountingMetadata class.""" + import math from AvgStdevMetadata import AvgStdevMetadata class BitCountingMetadata(AvgStdevMetadata): - """Class for metadata which includes information content.""" + """Class for metadata which includes information content of a group. + + The information content is based on an assumption + that the data consists of independent random values + from a normal distribution. + """ def __init__(self, max_value, size=0, avg=0.0, stdev=0.0, prev_avg=None): """Construct the metadata by computing from the values needed. diff --git a/resources/tools/presentation/new/jumpavg/BitCountingMetadataFactory.py b/resources/tools/presentation/new/jumpavg/BitCountingMetadataFactory.py index 5a7b393b55..567c3d4fe6 100644 --- a/resources/tools/presentation/new/jumpavg/BitCountingMetadataFactory.py +++ b/resources/tools/presentation/new/jumpavg/BitCountingMetadataFactory.py @@ -11,6 +11,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +"""Module holding BitCountingMetadataFactory class.""" + import math from AvgStdevMetadata import AvgStdevMetadata @@ -19,7 +21,10 @@ from BitCountingMetadata import BitCountingMetadata class BitCountingMetadataFactory(object): - """Class for factory which creates bit counting metadata from data.""" + """Class for factory which creates bit counting metadata from data. + + TODO: Summarize the methods? + """ @staticmethod def find_max_value(values): diff --git a/resources/tools/presentation/new/jumpavg/ClassifiedBitCountingMetadata.py b/resources/tools/presentation/new/jumpavg/ClassifiedBitCountingMetadata.py index 9a7277bc3e..29359f0908 100644 --- a/resources/tools/presentation/new/jumpavg/ClassifiedBitCountingMetadata.py +++ b/resources/tools/presentation/new/jumpavg/ClassifiedBitCountingMetadata.py @@ -11,11 +11,16 @@ # See the License for the specific language governing permissions and # limitations under the License. +"""Module holding ClassifiedBitCountingMetadata class.""" + from BitCountingMetadata import BitCountingMetadata class ClassifiedBitCountingMetadata(BitCountingMetadata): - """Class for metadata which includes classification.""" + """Class for metadata which includes classification. + + TODO: Can we create ClassifiedMetadata and inherit (also) from that? + """ def __init__( self, max_value, size=0, avg=0.0, stdev=0.0, prev_avg=None, diff --git a/resources/tools/presentation/new/jumpavg/ClassifiedMetadataFactory.py b/resources/tools/presentation/new/jumpavg/ClassifiedMetadataFactory.py index 39b157f26b..7fdea7c162 100644 --- a/resources/tools/presentation/new/jumpavg/ClassifiedMetadataFactory.py +++ b/resources/tools/presentation/new/jumpavg/ClassifiedMetadataFactory.py @@ -11,7 +11,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -import math +"""Module holding ClassifiedBitCountingMetadata class.""" from ClassifiedBitCountingMetadata import ClassifiedBitCountingMetadata diff --git a/resources/tools/presentation/new/jumpavg/RunGroup.py b/resources/tools/presentation/new/jumpavg/RunGroup.py index 808e02b792..9de8ae8919 100644 --- a/resources/tools/presentation/new/jumpavg/RunGroup.py +++ b/resources/tools/presentation/new/jumpavg/RunGroup.py @@ -11,8 +11,16 @@ # See the License for the specific language governing permissions and # limitations under the License. +"""Module holding RunGroup class.""" + class RunGroup(object): + """Effectively a named touple of data and metadata. + + TODO: This feels like an abstract class. + Most uses assume restrictions on metadata type. + Can this be defined similarly to C++ templates? + """ def __init__(self, metadata, values): """Create the group from metadata and values. diff --git a/resources/tools/presentation/new/utils.py b/resources/tools/presentation/new/utils.py index a688928cda..a4e24663b5 100644 --- a/resources/tools/presentation/new/utils.py +++ b/resources/tools/presentation/new/utils.py @@ -225,7 +225,7 @@ def classify_anomalies(data): bare_data = [0.0 if np.isnan(sample) else sample for _, sample in data.iteritems()] # TODO: Put analogous iterator into jumpavg library. - groups = BitCountingClassifier.classify(bare_data) + groups = BitCountingClassifier().classify(bare_data) groups.reverse() # Just to use .pop() for FIFO. classification = [] avgs = [] @@ -239,7 +239,7 @@ def classify_anomalies(data): continue if values_left < 1 or active_group is None: values_left = 0 - while values_left < 1: # To ignore empty groups. + while values_left < 1: # Ignore empty groups (should not happen). active_group = groups.pop() values_left = len(active_group.values) avg = active_group.metadata.avg -- 2.16.6