X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=csit.infra.etl%2Fiterative_mrr_rls2210.py;fp=csit.infra.etl%2Fiterative_rls2206.py;h=b7a8dbcbfa03ec90c09c3305dbf7fb2114487b3d;hb=refs%2Fchanges%2F62%2F37262%2F3;hp=88c644b625362d20d3ea65378d14ef23cb81fdc3;hpb=3f16ce1bbca46437c648d75f6a15d23dae0b8fc1;p=csit.git diff --git a/csit.infra.etl/iterative_rls2206.py b/csit.infra.etl/iterative_mrr_rls2210.py similarity index 81% rename from csit.infra.etl/iterative_rls2206.py rename to csit.infra.etl/iterative_mrr_rls2210.py index 88c644b625..b7a8dbcbfa 100644 --- a/csit.infra.etl/iterative_rls2206.py +++ b/csit.infra.etl/iterative_mrr_rls2210.py @@ -141,31 +141,30 @@ paths = wr.s3.list_objects( ignore_empty=True ) -filtered_paths = [path for path in paths if "report-iterative-2206" in path] - -for schema_name in ["mrr", "ndrpdr", "soak"]: - out_sdf = process_json_to_dataframe(schema_name, filtered_paths) - out_sdf.printSchema() - out_sdf = out_sdf \ - .withColumn("year", lit(datetime.now().year)) \ - .withColumn("month", lit(datetime.now().month)) \ - .withColumn("day", lit(datetime.now().day)) \ - .repartition(1) - - try: - wr.s3.to_parquet( - df=out_sdf.toPandas(), - path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/iterative_rls2206", - dataset=True, - partition_cols=["test_type", "year", "month", "day"], - compression="snappy", - use_threads=True, - mode="overwrite_partitions", - boto3_session=session.Session( - aws_access_key_id=environ["OUT_AWS_ACCESS_KEY_ID"], - aws_secret_access_key=environ["OUT_AWS_SECRET_ACCESS_KEY"], - region_name=environ["OUT_AWS_DEFAULT_REGION"] - ) +filtered_paths = [path for path in paths if "report-iterative-2210" in path] + +out_sdf = process_json_to_dataframe("mrr", filtered_paths) +out_sdf.printSchema() +out_sdf = out_sdf \ + .withColumn("year", lit(datetime.now().year)) \ + .withColumn("month", lit(datetime.now().month)) \ + .withColumn("day", lit(datetime.now().day)) \ + .repartition(1) + +try: + wr.s3.to_parquet( + df=out_sdf.toPandas(), + path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/iterative_rls2210", + dataset=True, + partition_cols=["test_type", "year", "month", "day"], + compression="snappy", + use_threads=True, + mode="overwrite_partitions", + boto3_session=session.Session( + aws_access_key_id=environ["OUT_AWS_ACCESS_KEY_ID"], + aws_secret_access_key=environ["OUT_AWS_SECRET_ACCESS_KEY"], + region_name=environ["OUT_AWS_DEFAULT_REGION"] ) - except EmptyDataFrame: - pass + ) +except EmptyDataFrame: + pass