X-Git-Url: https://gerrit.fd.io/r/gitweb?p=csit.git;a=blobdiff_plain;f=csit.infra.etl%2Fcoverage_device_rls2210.py;fp=csit.infra.etl%2Fcoverage_rls2206.py;h=9c9e1c96031db6ca2b605b68f1360abcdabffe85;hp=4e2619d924b272ca146d3e024cac2d82c2694d2b;hb=bff439b69ee71b654b1da92564ff62de7327fe71;hpb=3f16ce1bbca46437c648d75f6a15d23dae0b8fc1 diff --git a/csit.infra.etl/coverage_rls2206.py b/csit.infra.etl/coverage_device_rls2210.py similarity index 79% rename from csit.infra.etl/coverage_rls2206.py rename to csit.infra.etl/coverage_device_rls2210.py index 4e2619d924..9c9e1c9603 100644 --- a/csit.infra.etl/coverage_rls2206.py +++ b/csit.infra.etl/coverage_device_rls2210.py @@ -141,31 +141,30 @@ paths = wr.s3.list_objects( ignore_empty=True ) -filtered_paths = [path for path in paths if "report-coverage-2206" in path] - -for schema_name in ["mrr", "ndrpdr", "soak", "device"]: - out_sdf = process_json_to_dataframe(schema_name, filtered_paths) - out_sdf.printSchema() - out_sdf = out_sdf \ - .withColumn("year", lit(datetime.now().year)) \ - .withColumn("month", lit(datetime.now().month)) \ - .withColumn("day", lit(datetime.now().day)) \ - .repartition(1) - - try: - wr.s3.to_parquet( - df=out_sdf.toPandas(), - path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/coverage_rls2206", - dataset=True, - partition_cols=["test_type", "year", "month", "day"], - compression="snappy", - use_threads=True, - mode="overwrite_partitions", - boto3_session=session.Session( - aws_access_key_id=environ["OUT_AWS_ACCESS_KEY_ID"], - aws_secret_access_key=environ["OUT_AWS_SECRET_ACCESS_KEY"], - region_name=environ["OUT_AWS_DEFAULT_REGION"] - ) +filtered_paths = [path for path in paths if "report-coverage-2210" in path] + +out_sdf = process_json_to_dataframe("mrr", filtered_paths) +out_sdf.printSchema() +out_sdf = out_sdf \ + .withColumn("year", lit(datetime.now().year)) \ + .withColumn("month", lit(datetime.now().month)) \ + .withColumn("day", lit(datetime.now().day)) \ + .repartition(1) + +try: + wr.s3.to_parquet( + df=out_sdf.toPandas(), + path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/coverage_rls2210", + dataset=True, + partition_cols=["test_type", "year", "month", "day"], + compression="snappy", + use_threads=True, + mode="overwrite_partitions", + boto3_session=session.Session( + aws_access_key_id=environ["OUT_AWS_ACCESS_KEY_ID"], + aws_secret_access_key=environ["OUT_AWS_SECRET_ACCESS_KEY"], + region_name=environ["OUT_AWS_DEFAULT_REGION"] ) - except EmptyDataFrame: - pass + ) +except EmptyDataFrame: + pass