From: Peter Mikus Date: Thu, 24 Feb 2022 13:19:54 +0000 (+0100) Subject: feat(uti): Data source X-Git-Url: https://gerrit.fd.io/r/gitweb?p=csit.git;a=commitdiff_plain;h=646c242bfaea75db747df8c178c050499994c789;hp=6d99a2a08376860e18701174146506046c46b92c feat(uti): Data source Signed-off-by: Peter Mikus Change-Id: Idbd1f0f5a4b08909c2c8e783da62024c850ec90a --- diff --git a/resources/tools/dash/app/pal/report/data.py b/resources/tools/dash/app/pal/report/data.py index 43e0239228..848259be4a 100644 --- a/resources/tools/dash/app/pal/report/data.py +++ b/resources/tools/dash/app/pal/report/data.py @@ -13,14 +13,256 @@ """Prepare data for Plotly Dash.""" -import pandas as pd +from logging import info +from time import time +import awswrangler as wr +from awswrangler.exceptions import EmptyDataFrame, NoFilesFound +from boto3 import session -def create_dataframe(): - """Create Pandas DataFrame from local CSV. + +S3_DOCS_BUCKET="fdio-docs-s3-cloudfront-index" + +def create_dataframe_from_parquet( + path, partition_filter=None, columns=None, + validate_schema=False, last_modified_begin=None, + last_modified_end=None): + """Read parquet stored in S3 compatible storage and returns Pandas + Dataframe. + + :param path: S3 prefix (accepts Unix shell-style wildcards) (e.g. + s3://bucket/prefix) or list of S3 objects paths (e.g. [s3://bucket/key0, + s3://bucket/key1]). + :param partition_filter: Callback Function filters to apply on PARTITION + columns (PUSH-DOWN filter). This function MUST receive a single argument + (Dict[str, str]) where keys are partitions names and values are + partitions values. Partitions values will be always strings extracted + from S3. This function MUST return a bool, True to read the partition or + False to ignore it. Ignored if dataset=False. + :param columns: Names of columns to read from the file(s). + :param validate_schema: Check that individual file schemas are all the + same / compatible. Schemas within a folder prefix should all be the + same. Disable if you have schemas that are different and want to disable + this check. + :param last_modified_begin: Filter the s3 files by the Last modified date of + the object. The filter is applied only after list all s3 files. + :param last_modified_end: Filter the s3 files by the Last modified date of + the object. The filter is applied only after list all s3 files. + :type path: Union[str, List[str]] + :type partition_filter: Callable[[Dict[str, str]], bool], optional + :type columns: List[str], optional + :type validate_schema: bool, optional + :type last_modified_begin: datetime, optional + :type last_modified_end: datetime, optional + :returns: Pandas DataFrame or None if DataFrame cannot be fetched. + :rtype: DataFrame + """ + df = None + start = time() + try: + df = wr.s3.read_parquet( + path=path, + path_suffix="parquet", + ignore_empty=True, + validate_schema=validate_schema, + use_threads=True, + dataset=True, + columns=columns, + partition_filter=partition_filter, + last_modified_begin=last_modified_begin, + last_modified_end=last_modified_end + ) + info(f"Create dataframe {path} took: {time() - start}") + info(df) + info(df.info(memory_usage="deep")) + except NoFilesFound: + return df + + return df + + +def read_stats(): + """Read Suite Result Analysis data partition from parquet. + """ + lambda_f = lambda part: True if part["stats_type"] == "sra" else False + + return create_dataframe_from_parquet( + path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/stats", + partition_filter=lambda_f + ) + +def read_trending_mrr(): + """Read MRR data partition from parquet. + """ + lambda_f = lambda part: True if part["test_type"] == "mrr" else False + + return create_dataframe_from_parquet( + path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/trending", + partition_filter=lambda_f, + columns=["job", "build", "dut_type", "dut_version", "hosts", + "start_time", "passed", "test_id", "test_name_long", + "test_name_short", "version", + "result_receive_rate_rate_avg", + "result_receive_rate_rate_stdev", + "result_receive_rate_rate_unit", + "result_receive_rate_rate_values" + ] + ) + +def read_iterative_mrr(): + """Read MRR data partition from iterative parquet. + """ + lambda_f = lambda part: True if part["test_type"] == "mrr" else False + + return create_dataframe_from_parquet( + path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/iterative_rls2202", + partition_filter=lambda_f, + columns=["job", "build", "dut_type", "dut_version", "hosts", + "start_time", "passed", "test_id", "test_name_long", + "test_name_short", "version", + "result_receive_rate_rate_avg", + "result_receive_rate_rate_stdev", + "result_receive_rate_rate_unit", + "result_receive_rate_rate_values" + ] + ) + +def read_trending_ndrpdr(): + """Read NDRPDR data partition from iterative parquet. + """ + lambda_f = lambda part: True if part["test_type"] == "ndrpdr" else False + + return create_dataframe_from_parquet( + path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/trending", + partition_filter=lambda_f, + columns=["job", "build", "dut_type", "dut_version", "hosts", + "start_time", "passed", "test_id", "test_name_long", + "test_name_short", "version", + "result_pdr_upper_rate_unit", + "result_pdr_upper_rate_value", + "result_pdr_upper_bandwidth_unit", + "result_pdr_upper_bandwidth_value", + "result_pdr_lower_rate_unit", + "result_pdr_lower_rate_value", + "result_pdr_lower_bandwidth_unit", + "result_pdr_lower_bandwidth_value", + "result_ndr_upper_rate_unit", + "result_ndr_upper_rate_value", + "result_ndr_upper_bandwidth_unit", + "result_ndr_upper_bandwidth_value", + "result_ndr_lower_rate_unit", + "result_ndr_lower_rate_value", + "result_ndr_lower_bandwidth_unit", + "result_ndr_lower_bandwidth_value", + "result_latency_reverse_pdr_90_avg", + "result_latency_reverse_pdr_90_hdrh", + "result_latency_reverse_pdr_90_max", + "result_latency_reverse_pdr_90_min", + "result_latency_reverse_pdr_90_unit", + "result_latency_reverse_pdr_50_avg", + "result_latency_reverse_pdr_50_hdrh", + "result_latency_reverse_pdr_50_max", + "result_latency_reverse_pdr_50_min", + "result_latency_reverse_pdr_50_unit", + "result_latency_reverse_pdr_10_avg", + "result_latency_reverse_pdr_10_hdrh", + "result_latency_reverse_pdr_10_max", + "result_latency_reverse_pdr_10_min", + "result_latency_reverse_pdr_10_unit", + "result_latency_reverse_pdr_0_avg", + "result_latency_reverse_pdr_0_hdrh", + "result_latency_reverse_pdr_0_max", + "result_latency_reverse_pdr_0_min", + "result_latency_reverse_pdr_0_unit", + "result_latency_forward_pdr_90_avg", + "result_latency_forward_pdr_90_hdrh", + "result_latency_forward_pdr_90_max", + "result_latency_forward_pdr_90_min", + "result_latency_forward_pdr_90_unit", + "result_latency_forward_pdr_50_avg", + "result_latency_forward_pdr_50_hdrh", + "result_latency_forward_pdr_50_max", + "result_latency_forward_pdr_50_min", + "result_latency_forward_pdr_50_unit", + "result_latency_forward_pdr_10_avg", + "result_latency_forward_pdr_10_hdrh", + "result_latency_forward_pdr_10_max", + "result_latency_forward_pdr_10_min", + "result_latency_forward_pdr_10_unit", + "result_latency_forward_pdr_0_avg", + "result_latency_forward_pdr_0_hdrh", + "result_latency_forward_pdr_0_max", + "result_latency_forward_pdr_0_min", + "result_latency_forward_pdr_0_unit" + ] + ) + +def read_iterative_ndrpdr(): + """Read NDRPDR data partition from parquet. """ + lambda_f = lambda part: True if part["test_type"] == "ndrpdr" else False - return pd.read_csv( - u"https://s3-docs.fd.io/csit/master/trending/_static/vpp/" - u"csit-vpp-perf-mrr-daily-master-2n-skx-trending.csv" + return create_dataframe_from_parquet( + path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/iterative_rls2202", + partition_filter=lambda_f, + columns=["job", "build", "dut_type", "dut_version", "hosts", + "start_time", "passed", "test_id", "test_name_long", + "test_name_short", "version", + "result_pdr_upper_rate_unit", + "result_pdr_upper_rate_value", + "result_pdr_upper_bandwidth_unit", + "result_pdr_upper_bandwidth_value", + "result_pdr_lower_rate_unit", + "result_pdr_lower_rate_value", + "result_pdr_lower_bandwidth_unit", + "result_pdr_lower_bandwidth_value", + "result_ndr_upper_rate_unit", + "result_ndr_upper_rate_value", + "result_ndr_upper_bandwidth_unit", + "result_ndr_upper_bandwidth_value", + "result_ndr_lower_rate_unit", + "result_ndr_lower_rate_value", + "result_ndr_lower_bandwidth_unit", + "result_ndr_lower_bandwidth_value", + "result_latency_reverse_pdr_90_avg", + "result_latency_reverse_pdr_90_hdrh", + "result_latency_reverse_pdr_90_max", + "result_latency_reverse_pdr_90_min", + "result_latency_reverse_pdr_90_unit", + "result_latency_reverse_pdr_50_avg", + "result_latency_reverse_pdr_50_hdrh", + "result_latency_reverse_pdr_50_max", + "result_latency_reverse_pdr_50_min", + "result_latency_reverse_pdr_50_unit", + "result_latency_reverse_pdr_10_avg", + "result_latency_reverse_pdr_10_hdrh", + "result_latency_reverse_pdr_10_max", + "result_latency_reverse_pdr_10_min", + "result_latency_reverse_pdr_10_unit", + "result_latency_reverse_pdr_0_avg", + "result_latency_reverse_pdr_0_hdrh", + "result_latency_reverse_pdr_0_max", + "result_latency_reverse_pdr_0_min", + "result_latency_reverse_pdr_0_unit", + "result_latency_forward_pdr_90_avg", + "result_latency_forward_pdr_90_hdrh", + "result_latency_forward_pdr_90_max", + "result_latency_forward_pdr_90_min", + "result_latency_forward_pdr_90_unit", + "result_latency_forward_pdr_50_avg", + "result_latency_forward_pdr_50_hdrh", + "result_latency_forward_pdr_50_max", + "result_latency_forward_pdr_50_min", + "result_latency_forward_pdr_50_unit", + "result_latency_forward_pdr_10_avg", + "result_latency_forward_pdr_10_hdrh", + "result_latency_forward_pdr_10_max", + "result_latency_forward_pdr_10_min", + "result_latency_forward_pdr_10_unit", + "result_latency_forward_pdr_0_avg", + "result_latency_forward_pdr_0_hdrh", + "result_latency_forward_pdr_0_max", + "result_latency_forward_pdr_0_min", + "result_latency_forward_pdr_0_unit" + ] ) diff --git a/resources/tools/dash/app/pal/report/report.py b/resources/tools/dash/app/pal/report/report.py index d22a0b6705..769a6dd63e 100644 --- a/resources/tools/dash/app/pal/report/report.py +++ b/resources/tools/dash/app/pal/report/report.py @@ -18,10 +18,10 @@ import dash from dash import dcc from dash import html from dash import dash_table -import numpy as np -import pandas as pd -from .data import create_dataframe +from .data import read_stats +from .data import read_trending_mrr, read_trending_ndrpdr +from .data import read_iterative_mrr, read_iterative_ndrpdr from .layout import html_layout @@ -43,34 +43,66 @@ def init_report(server): ], ) - # Load DataFrame - df = create_dataframe() - # Custom HTML layout dash_app.index_string = html_layout # Create Layout dash_app.layout = html.Div( children=[ - create_data_table(df), + html.Div( + children=create_data_table( + read_stats().dropna(), + u"database-table-stats" + ) + ), + html.Div( + children=create_data_table( + read_trending_mrr().dropna(), + u"database-table-mrr" + ) + ), + html.Div( + children=create_data_table( + read_trending_ndrpdr().dropna(), + u"database-table-ndrpdr" + ) + ), + html.Div( + children=create_data_table( + read_iterative_mrr().dropna(), + u"database-table-iterative-mrr" + ) + ), + html.Div( + children=create_data_table( + read_iterative_ndrpdr().dropna(), + u"database-table-iterative-ndrpdr" + ) + ) ], id=u"dash-container", ) return dash_app.server -def create_data_table(df): +def create_data_table(df, id): """Create Dash datatable from Pandas DataFrame. DEMO """ table = dash_table.DataTable( - id=u"database-table", + id=id, columns=[{u"name": i, u"id": i} for i in df.columns], data=df.to_dict(u"records"), + fixed_rows={'headers': True}, sort_action=u"native", sort_mode=u"native", page_size=5, + style_header={ + 'overflow': 'hidden', + 'textOverflow': 'ellipsis', + 'minWidth': 95, 'maxWidth': 95, 'width': 95, + } ) return table diff --git a/resources/tools/dash/app/requirements.txt b/resources/tools/dash/app/requirements.txt index befdc8a83b..5c3a5488f0 100644 --- a/resources/tools/dash/app/requirements.txt +++ b/resources/tools/dash/app/requirements.txt @@ -1,4 +1,5 @@ attrs==21.2.0 +awswrangler==2.14.0 Brotli==1.0.9 click==8.0.3 dash==2.0.0 diff --git a/resources/tools/dash/docker-compose.yaml b/resources/tools/dash/docker-compose.yaml index 335bcdaf94..4fe12bd11e 100644 --- a/resources/tools/dash/docker-compose.yaml +++ b/resources/tools/dash/docker-compose.yaml @@ -16,4 +16,5 @@ services: - "5000:5000" volumes: - "./app/:/app" + - "/home/vagrant/.aws:/root/.aws" working_dir: "/app"