X-Git-Url: https://gerrit.fd.io/r/gitweb?p=csit.git;a=blobdiff_plain;f=resources%2Ftools%2Fdash%2Fapp%2Fpal%2Freport%2Fdata.py;fp=resources%2Ftools%2Fdash%2Fapp%2Fpal%2Freport%2Fdata.py;h=0000000000000000000000000000000000000000;hp=848259be4abba498ad24aaae5ee497ade95e41db;hb=3343fe81729eb4005319ca15b1e6881630d38c5b;hpb=099b961a0aa234f870ff60e36492e324bb2abe11 diff --git a/resources/tools/dash/app/pal/report/data.py b/resources/tools/dash/app/pal/report/data.py deleted file mode 100644 index 848259be4a..0000000000 --- a/resources/tools/dash/app/pal/report/data.py +++ /dev/null @@ -1,268 +0,0 @@ -# Copyright (c) 2022 Cisco and/or its affiliates. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at: -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Prepare data for Plotly Dash.""" - -from logging import info -from time import time - -import awswrangler as wr -from awswrangler.exceptions import EmptyDataFrame, NoFilesFound -from boto3 import session - - -S3_DOCS_BUCKET="fdio-docs-s3-cloudfront-index" - -def create_dataframe_from_parquet( - path, partition_filter=None, columns=None, - validate_schema=False, last_modified_begin=None, - last_modified_end=None): - """Read parquet stored in S3 compatible storage and returns Pandas - Dataframe. - - :param path: S3 prefix (accepts Unix shell-style wildcards) (e.g. - s3://bucket/prefix) or list of S3 objects paths (e.g. [s3://bucket/key0, - s3://bucket/key1]). - :param partition_filter: Callback Function filters to apply on PARTITION - columns (PUSH-DOWN filter). This function MUST receive a single argument - (Dict[str, str]) where keys are partitions names and values are - partitions values. Partitions values will be always strings extracted - from S3. This function MUST return a bool, True to read the partition or - False to ignore it. Ignored if dataset=False. - :param columns: Names of columns to read from the file(s). - :param validate_schema: Check that individual file schemas are all the - same / compatible. Schemas within a folder prefix should all be the - same. Disable if you have schemas that are different and want to disable - this check. - :param last_modified_begin: Filter the s3 files by the Last modified date of - the object. The filter is applied only after list all s3 files. - :param last_modified_end: Filter the s3 files by the Last modified date of - the object. The filter is applied only after list all s3 files. - :type path: Union[str, List[str]] - :type partition_filter: Callable[[Dict[str, str]], bool], optional - :type columns: List[str], optional - :type validate_schema: bool, optional - :type last_modified_begin: datetime, optional - :type last_modified_end: datetime, optional - :returns: Pandas DataFrame or None if DataFrame cannot be fetched. - :rtype: DataFrame - """ - df = None - start = time() - try: - df = wr.s3.read_parquet( - path=path, - path_suffix="parquet", - ignore_empty=True, - validate_schema=validate_schema, - use_threads=True, - dataset=True, - columns=columns, - partition_filter=partition_filter, - last_modified_begin=last_modified_begin, - last_modified_end=last_modified_end - ) - info(f"Create dataframe {path} took: {time() - start}") - info(df) - info(df.info(memory_usage="deep")) - except NoFilesFound: - return df - - return df - - -def read_stats(): - """Read Suite Result Analysis data partition from parquet. - """ - lambda_f = lambda part: True if part["stats_type"] == "sra" else False - - return create_dataframe_from_parquet( - path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/stats", - partition_filter=lambda_f - ) - -def read_trending_mrr(): - """Read MRR data partition from parquet. - """ - lambda_f = lambda part: True if part["test_type"] == "mrr" else False - - return create_dataframe_from_parquet( - path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/trending", - partition_filter=lambda_f, - columns=["job", "build", "dut_type", "dut_version", "hosts", - "start_time", "passed", "test_id", "test_name_long", - "test_name_short", "version", - "result_receive_rate_rate_avg", - "result_receive_rate_rate_stdev", - "result_receive_rate_rate_unit", - "result_receive_rate_rate_values" - ] - ) - -def read_iterative_mrr(): - """Read MRR data partition from iterative parquet. - """ - lambda_f = lambda part: True if part["test_type"] == "mrr" else False - - return create_dataframe_from_parquet( - path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/iterative_rls2202", - partition_filter=lambda_f, - columns=["job", "build", "dut_type", "dut_version", "hosts", - "start_time", "passed", "test_id", "test_name_long", - "test_name_short", "version", - "result_receive_rate_rate_avg", - "result_receive_rate_rate_stdev", - "result_receive_rate_rate_unit", - "result_receive_rate_rate_values" - ] - ) - -def read_trending_ndrpdr(): - """Read NDRPDR data partition from iterative parquet. - """ - lambda_f = lambda part: True if part["test_type"] == "ndrpdr" else False - - return create_dataframe_from_parquet( - path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/trending", - partition_filter=lambda_f, - columns=["job", "build", "dut_type", "dut_version", "hosts", - "start_time", "passed", "test_id", "test_name_long", - "test_name_short", "version", - "result_pdr_upper_rate_unit", - "result_pdr_upper_rate_value", - "result_pdr_upper_bandwidth_unit", - "result_pdr_upper_bandwidth_value", - "result_pdr_lower_rate_unit", - "result_pdr_lower_rate_value", - "result_pdr_lower_bandwidth_unit", - "result_pdr_lower_bandwidth_value", - "result_ndr_upper_rate_unit", - "result_ndr_upper_rate_value", - "result_ndr_upper_bandwidth_unit", - "result_ndr_upper_bandwidth_value", - "result_ndr_lower_rate_unit", - "result_ndr_lower_rate_value", - "result_ndr_lower_bandwidth_unit", - "result_ndr_lower_bandwidth_value", - "result_latency_reverse_pdr_90_avg", - "result_latency_reverse_pdr_90_hdrh", - "result_latency_reverse_pdr_90_max", - "result_latency_reverse_pdr_90_min", - "result_latency_reverse_pdr_90_unit", - "result_latency_reverse_pdr_50_avg", - "result_latency_reverse_pdr_50_hdrh", - "result_latency_reverse_pdr_50_max", - "result_latency_reverse_pdr_50_min", - "result_latency_reverse_pdr_50_unit", - "result_latency_reverse_pdr_10_avg", - "result_latency_reverse_pdr_10_hdrh", - "result_latency_reverse_pdr_10_max", - "result_latency_reverse_pdr_10_min", - "result_latency_reverse_pdr_10_unit", - "result_latency_reverse_pdr_0_avg", - "result_latency_reverse_pdr_0_hdrh", - "result_latency_reverse_pdr_0_max", - "result_latency_reverse_pdr_0_min", - "result_latency_reverse_pdr_0_unit", - "result_latency_forward_pdr_90_avg", - "result_latency_forward_pdr_90_hdrh", - "result_latency_forward_pdr_90_max", - "result_latency_forward_pdr_90_min", - "result_latency_forward_pdr_90_unit", - "result_latency_forward_pdr_50_avg", - "result_latency_forward_pdr_50_hdrh", - "result_latency_forward_pdr_50_max", - "result_latency_forward_pdr_50_min", - "result_latency_forward_pdr_50_unit", - "result_latency_forward_pdr_10_avg", - "result_latency_forward_pdr_10_hdrh", - "result_latency_forward_pdr_10_max", - "result_latency_forward_pdr_10_min", - "result_latency_forward_pdr_10_unit", - "result_latency_forward_pdr_0_avg", - "result_latency_forward_pdr_0_hdrh", - "result_latency_forward_pdr_0_max", - "result_latency_forward_pdr_0_min", - "result_latency_forward_pdr_0_unit" - ] - ) - -def read_iterative_ndrpdr(): - """Read NDRPDR data partition from parquet. - """ - lambda_f = lambda part: True if part["test_type"] == "ndrpdr" else False - - return create_dataframe_from_parquet( - path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/iterative_rls2202", - partition_filter=lambda_f, - columns=["job", "build", "dut_type", "dut_version", "hosts", - "start_time", "passed", "test_id", "test_name_long", - "test_name_short", "version", - "result_pdr_upper_rate_unit", - "result_pdr_upper_rate_value", - "result_pdr_upper_bandwidth_unit", - "result_pdr_upper_bandwidth_value", - "result_pdr_lower_rate_unit", - "result_pdr_lower_rate_value", - "result_pdr_lower_bandwidth_unit", - "result_pdr_lower_bandwidth_value", - "result_ndr_upper_rate_unit", - "result_ndr_upper_rate_value", - "result_ndr_upper_bandwidth_unit", - "result_ndr_upper_bandwidth_value", - "result_ndr_lower_rate_unit", - "result_ndr_lower_rate_value", - "result_ndr_lower_bandwidth_unit", - "result_ndr_lower_bandwidth_value", - "result_latency_reverse_pdr_90_avg", - "result_latency_reverse_pdr_90_hdrh", - "result_latency_reverse_pdr_90_max", - "result_latency_reverse_pdr_90_min", - "result_latency_reverse_pdr_90_unit", - "result_latency_reverse_pdr_50_avg", - "result_latency_reverse_pdr_50_hdrh", - "result_latency_reverse_pdr_50_max", - "result_latency_reverse_pdr_50_min", - "result_latency_reverse_pdr_50_unit", - "result_latency_reverse_pdr_10_avg", - "result_latency_reverse_pdr_10_hdrh", - "result_latency_reverse_pdr_10_max", - "result_latency_reverse_pdr_10_min", - "result_latency_reverse_pdr_10_unit", - "result_latency_reverse_pdr_0_avg", - "result_latency_reverse_pdr_0_hdrh", - "result_latency_reverse_pdr_0_max", - "result_latency_reverse_pdr_0_min", - "result_latency_reverse_pdr_0_unit", - "result_latency_forward_pdr_90_avg", - "result_latency_forward_pdr_90_hdrh", - "result_latency_forward_pdr_90_max", - "result_latency_forward_pdr_90_min", - "result_latency_forward_pdr_90_unit", - "result_latency_forward_pdr_50_avg", - "result_latency_forward_pdr_50_hdrh", - "result_latency_forward_pdr_50_max", - "result_latency_forward_pdr_50_min", - "result_latency_forward_pdr_50_unit", - "result_latency_forward_pdr_10_avg", - "result_latency_forward_pdr_10_hdrh", - "result_latency_forward_pdr_10_max", - "result_latency_forward_pdr_10_min", - "result_latency_forward_pdr_10_unit", - "result_latency_forward_pdr_0_avg", - "result_latency_forward_pdr_0_hdrh", - "result_latency_forward_pdr_0_max", - "result_latency_forward_pdr_0_min", - "result_latency_forward_pdr_0_unit" - ] - )