From 22a45eda880bee367ff8937d8e764cd41905a0cd Mon Sep 17 00:00:00 2001 From: Peter Mikus Date: Fri, 5 Aug 2022 08:01:59 +0000 Subject: [PATCH] feat(uti): Add some more debug tools Signed-off-by: Peter Mikus Change-Id: I1dfe1782334c8415fe5dbcdba24781947076639d --- resources/tools/dash/app/pal/data/data.py | 54 +++++++++++++++++++++++++++++ resources/tools/dash/app/pal/data/data.yaml | 6 ++++ resources/tools/dash/app/pal/debug.py | 45 ++++++++++++++++++++++++ 3 files changed, 105 insertions(+) create mode 100644 resources/tools/dash/app/pal/debug.py diff --git a/resources/tools/dash/app/pal/data/data.py b/resources/tools/dash/app/pal/data/data.py index 0956333e34..77fd113a9c 100644 --- a/resources/tools/dash/app/pal/data/data.py +++ b/resources/tools/dash/app/pal/data/data.py @@ -113,6 +113,48 @@ class Data: f"specified.\n{err}" ) + def _get_list_of_files(self, + path, + last_modified_begin=None, + last_modified_end=None, + days=None) -> list: + """Get list of interested files stored in S3 compatible storage and + returns it. + + :param path: S3 prefix (accepts Unix shell-style wildcards) + (e.g. s3://bucket/prefix) or list of S3 objects paths + (e.g. [s3://bucket/key0, s3://bucket/key1]). + :param last_modified_begin: Filter the s3 files by the Last modified + date of the object. The filter is applied only after list all s3 + files. + :param last_modified_end: Filter the s3 files by the Last modified date + of the object. The filter is applied only after list all s3 files. + :param days: Number of days to filter. + :type path: Union[str, List[str]] + :type last_modified_begin: datetime, optional + :type last_modified_end: datetime, optional + :type days: integer, optional + :returns: List of file names. + :rtype: List + """ + if days: + last_modified_begin = datetime.now(tz=UTC) - timedelta(days=days) + try: + file_list = wr.s3.list_objects( + path=path, + suffix="parquet", + last_modified_begin=last_modified_begin, + last_modified_end=last_modified_end + ) + if self._debug: + logging.info("\n".join(file_list)) + except NoFilesFound as err: + logging.error(f"No parquets found.\n{err}") + except EmptyDataFrame as err: + logging.error(f"No data.\n{err}") + + return file_list + def _create_dataframe_from_parquet(self, path, partition_filter=None, columns=None, @@ -142,12 +184,14 @@ class Data: files. :param last_modified_end: Filter the s3 files by the Last modified date of the object. The filter is applied only after list all s3 files. + :param days: Number of days to filter. :type path: Union[str, List[str]] :type partition_filter: Callable[[Dict[str, str]], bool], optional :type columns: List[str], optional :type validate_schema: bool, optional :type last_modified_begin: datetime, optional :type last_modified_end: datetime, optional + :type days: integer, optional :returns: Pandas DataFrame or None if DataFrame cannot be fetched. :rtype: DataFrame """ @@ -183,6 +227,16 @@ class Data: self._data = df return df + def check_datasets(self, days: int=None): + """Read structure from parquet. + + :param days: Number of days back to the past for which the data will be + read. + :type days: int + """ + self._get_list_of_files(path=self._get_path("trending"), days=days) + self._get_list_of_files(path=self._get_path("statistics"), days=days) + def read_stats(self, days: int=None) -> tuple: """Read statistics from parquet. diff --git a/resources/tools/dash/app/pal/data/data.yaml b/resources/tools/dash/app/pal/data/data.yaml index 59533f97a4..396f1b1638 100644 --- a/resources/tools/dash/app/pal/data/data.yaml +++ b/resources/tools/dash/app/pal/data/data.yaml @@ -30,6 +30,12 @@ statistics-trending-mrr: - passed - test_id - result_receive_rate_rate_avg +trending: + path: s3://fdio-docs-s3-cloudfront-index/csit/parquet/trending + columns: + - job + - build + - start_time trending-mrr: path: s3://fdio-docs-s3-cloudfront-index/csit/parquet/trending columns: diff --git a/resources/tools/dash/app/pal/debug.py b/resources/tools/dash/app/pal/debug.py new file mode 100644 index 0000000000..f0543820b1 --- /dev/null +++ b/resources/tools/dash/app/pal/debug.py @@ -0,0 +1,45 @@ +# Copyright (c) 2022 Cisco and/or its affiliates. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging + +from data.data import Data +from utils.constants import Constants as C + + +logging.basicConfig( + format=u"%(asctime)s: %(levelname)s: %(message)s", + datefmt=u"%Y/%m/%d %H:%M:%S", + level=logging.INFO +) + +# Set the time period for data fetch +if C.TIME_PERIOD is None or C.TIME_PERIOD > C.MAX_TIME_PERIOD: + time_period = C.MAX_TIME_PERIOD +else: + time_period = C.TIME_PERIOD + +#data_mrr = Data( +# data_spec_file=C.DATA_SPEC_FILE, +# debug=True +#).read_trending_mrr(days=time_period) +# +#data_ndrpdr = Data( +# data_spec_file=C.DATA_SPEC_FILE, +# debug=True +#).read_trending_ndrpdr(days=time_period) + +data_list = Data( + data_spec_file=C.DATA_SPEC_FILE, + debug=True +).check_datasets(days=time_period) \ No newline at end of file -- 2.16.6