+ Do not declare BitCountingClassifier.classify() as class method.
+ Make BitCountingGroupList subclass of list.
+ Inherit from abstract classes whenever possible.
+ Drop unneeded imports.
+ Add module docstrings and class docstrings anywhere.
+ Add TODOs hinting at possible improvements.
Change-Id: Iccfff5c0e7be0607d6cfa74314083fcfe5a4d7d9
Signed-off-by: Vratko Polak <vrpolak@cisco.com>
# See the License for the specific language governing permissions and
# limitations under the License.
+"""Module holding AbstractGroupClassifier class."""
+
from abc import ABCMeta, abstractmethod
class AbstractGroupClassifier(object):
+ """Abstract class defining API for classifier.
+
+ The classifier is an object with classify() method
+ which divides data into groups containing metadata.
+ """
__metaclass__ = ABCMeta
# See the License for the specific language governing permissions and
# limitations under the License.
+"""Module holding AbstractGroupMetadata class."""
+
from abc import ABCMeta, abstractmethod
class AbstractGroupMetadata(object):
+ """Abstract classdefining API for metadata.
+
+ At this level, only __str__() and __repr() methods are required."""
__metaclass__ = ABCMeta
# See the License for the specific language governing permissions and
# limitations under the License.
+"""Module for holding AvgStdevMetadata class."""
-class AvgStdevMetadata(object):
+from AbstractGroupMetadata import AbstractGroupMetadata
+
+
+class AvgStdevMetadata(AbstractGroupMetadata):
"""Class for metadata specifying the average and standard deviation."""
def __init__(self, size=0, avg=0.0, stdev=0.0):
# See the License for the specific language governing permissions and
# limitations under the License.
+"""Module holding AvgStdevMetadataFactory class."""
+
import math
from AvgStdevMetadata import AvgStdevMetadata
# See the License for the specific language governing permissions and
# limitations under the License.
+"""Module holding BitCountingClassifier class.
+
+This is the main class to be used by callers."""
+
+from AbstractGroupClassifier import AbstractGroupClassifier
from BitCountingGroup import BitCountingGroup
from BitCountingGroupList import BitCountingGroupList
from BitCountingMetadataFactory import BitCountingMetadataFactory
from ClassifiedMetadataFactory import ClassifiedMetadataFactory
-class BitCountingClassifier(object):
+class BitCountingClassifier(AbstractGroupClassifier):
+ """Classifier using Minimal Description Length principle."""
- @staticmethod
- def classify(values):
+ def classify(self, values):
"""Return the values in groups of optimal bit count.
- TODO: Could we return BitCountingGroupList and let caller process it?
+ The current implementation could be a static method,
+ but we might support options in later versions,
+ for example for chosing encodings.
:param values: Sequence of runs to classify.
:type values: Iterable of float or of AvgStdevMetadata
:returns: Classified group list.
- :rtype: list of BitCountingGroup
+ :rtype: BitCountingGroupList
"""
max_value = BitCountingMetadataFactory.find_max_value(values)
factory = BitCountingMetadataFactory(max_value)
group.metadata = ClassifiedMetadataFactory.with_classification(
group.metadata, "progression")
previous_average = group.metadata.avg
- return partition.group_list
+ return partition
# See the License for the specific language governing permissions and
# limitations under the License.
+"""Module holding BitCountingGroup class."""
+
from RunGroup import RunGroup
class BitCountingGroup(RunGroup):
+ """RunGroup with BitCountingMetadata.
+
+ Support with_run_added() method to simplify extending the group.
+ As bit content has to be re-counted, metadata factory is stored.
+ """
def __init__(self, metadata_factory, values=[]):
"""Create the group from metadata factory and values.
# See the License for the specific language governing permissions and
# limitations under the License.
+"""Module holding BitCountingGroupList class."""
+
from BitCountingGroup import BitCountingGroup
from BitCountingMetadataFactory import BitCountingMetadataFactory
-class BitCountingGroupList(object):
+class BitCountingGroupList(list):
+ """List of BitCountingGroup which tracks overall bit count.
+
+ This is useful, as bit count of a subsequent group
+ depends on average of the previous group.
+ Having the logic encapsulated here spares the caller
+ the effort to pass averages around.
+
+ Method with_value_added_to_last_group() delegates to BitCountingGroup,
+ with_group_appended() adds new group with recalculated bits.
+
+ TODO: last_group.metadata_factory.max_value in with_group_appended()
+ is ugly, find a more natural class design.
+ """
def __init__(self, group_list=[], bits=None):
"""Create a group list from given list of groups.
:type group_list: list of BitCountingGroup
:type bits: float or None
"""
- self.group_list = group_list
+ super(BitCountingGroupList, self).__init__(group_list)
if bits is not None:
self.bits = bits
return
bits += group.metadata.bits
self.bits = bits
- def __getitem__(self, index):
- """Return group at the index. This makes self iterable.
-
- :param index: The position in the array of groups.
- :type index: int
- :returns: Group at the position.
- :rtype: BitCountingGroup
- """
- return self.group_list[index]
-
def with_group_appended(self, group):
"""Create and return new group list with given group more than self.
:returns: New group list with added group.
:rtype: BitCountingGroupList
"""
- group_list = list(self.group_list)
+ group_list = list(self)
if group_list:
last_group = group_list[-1]
factory = BitCountingMetadataFactory(
:returns: New group list with the last group updated.
:rtype: BitCountingGroupList
"""
- last_group = self.group_list[-1]
+ group_list = list(self)
+ last_group = group_list[-1]
bits_before = last_group.metadata.bits
last_group = last_group.with_run_added(value)
- group_list = list(self.group_list)
group_list[-1] = last_group
bits = self.bits - bits_before + last_group.metadata.bits
return BitCountingGroupList(group_list, bits)
# See the License for the specific language governing permissions and
# limitations under the License.
+"""Module holding BitCountingMetadata class."""
+
import math
from AvgStdevMetadata import AvgStdevMetadata
class BitCountingMetadata(AvgStdevMetadata):
- """Class for metadata which includes information content."""
+ """Class for metadata which includes information content of a group.
+
+ The information content is based on an assumption
+ that the data consists of independent random values
+ from a normal distribution.
+ """
def __init__(self, max_value, size=0, avg=0.0, stdev=0.0, prev_avg=None):
"""Construct the metadata by computing from the values needed.
# See the License for the specific language governing permissions and
# limitations under the License.
+"""Module holding BitCountingMetadataFactory class."""
+
import math
from AvgStdevMetadata import AvgStdevMetadata
class BitCountingMetadataFactory(object):
- """Class for factory which creates bit counting metadata from data."""
+ """Class for factory which creates bit counting metadata from data.
+
+ TODO: Summarize the methods?
+ """
@staticmethod
def find_max_value(values):
# See the License for the specific language governing permissions and
# limitations under the License.
+"""Module holding ClassifiedBitCountingMetadata class."""
+
from BitCountingMetadata import BitCountingMetadata
class ClassifiedBitCountingMetadata(BitCountingMetadata):
- """Class for metadata which includes classification."""
+ """Class for metadata which includes classification.
+
+ TODO: Can we create ClassifiedMetadata and inherit (also) from that?
+ """
def __init__(
self, max_value, size=0, avg=0.0, stdev=0.0, prev_avg=None,
# See the License for the specific language governing permissions and
# limitations under the License.
-import math
+"""Module holding ClassifiedBitCountingMetadata class."""
from ClassifiedBitCountingMetadata import ClassifiedBitCountingMetadata
# See the License for the specific language governing permissions and
# limitations under the License.
+"""Module holding RunGroup class."""
+
class RunGroup(object):
+ """Effectively a named touple of data and metadata.
+
+ TODO: This feels like an abstract class.
+ Most uses assume restrictions on metadata type.
+ Can this be defined similarly to C++ templates?
+ """
def __init__(self, metadata, values):
"""Create the group from metadata and values.
bare_data = [0.0 if np.isnan(sample) else sample
for _, sample in data.iteritems()]
# TODO: Put analogous iterator into jumpavg library.
- groups = BitCountingClassifier.classify(bare_data)
+ groups = BitCountingClassifier().classify(bare_data)
groups.reverse() # Just to use .pop() for FIFO.
classification = []
avgs = []
continue
if values_left < 1 or active_group is None:
values_left = 0
- while values_left < 1: # To ignore empty groups.
+ while values_left < 1: # Ignore empty groups (should not happen).
active_group = groups.pop()
values_left = len(active_group.values)
avg = active_group.metadata.avg