From: Peter Mikus Date: Thu, 25 Sep 2025 06:48:14 +0000 (+0200) Subject: feat(etl): Pipeline enable X-Git-Url: https://gerrit.fd.io/r/gitweb?a=commitdiff_plain;h=981c8bb58aa738dff8380f64f90dc8ceb11ef25b;p=csit.git feat(etl): Pipeline enable Signed-off-by: Peter Mikus Change-Id: I1bc7f71a95514d742c58484947c11d219a713be0 --- diff --git a/csit.infra.etl/coverage_hoststack.py b/csit.infra.etl/coverage_hoststack.py index 1b4c87ef11..1c3102791c 100644 --- a/csit.infra.etl/coverage_hoststack.py +++ b/csit.infra.etl/coverage_hoststack.py @@ -141,7 +141,7 @@ paths = wr.s3.list_objects( ignore_empty=True ) -filtered_paths = [path for path in paths if "report-coverage-2506" in path] +filtered_paths = [path for path in paths if "report-coverage-2510" in path] out_sdf = process_json_to_dataframe("hoststack", filtered_paths) out_sdf.printSchema() @@ -163,7 +163,7 @@ except KeyError: try: wr.s3.to_parquet( df=out_sdf.toPandas(), - path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/coverage_rls2506", + path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/coverage_rls2510", dataset=True, partition_cols=["test_type", "year", "month", "day"], compression="snappy", diff --git a/csit.infra.etl/coverage_mrr.py b/csit.infra.etl/coverage_mrr.py index c371229f4d..7db04d7602 100644 --- a/csit.infra.etl/coverage_mrr.py +++ b/csit.infra.etl/coverage_mrr.py @@ -141,7 +141,7 @@ paths = wr.s3.list_objects( ignore_empty=True ) -filtered_paths = [path for path in paths if "report-coverage-2506" in path] +filtered_paths = [path for path in paths if "report-coverage-2510" in path] out_sdf = process_json_to_dataframe("mrr", filtered_paths) out_sdf.printSchema() @@ -163,7 +163,7 @@ except KeyError: try: wr.s3.to_parquet( df=out_sdf.toPandas(), - path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/coverage_rls2506", + path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/coverage_rls2510", dataset=True, partition_cols=["test_type", "year", "month", "day"], compression="snappy", diff --git a/csit.infra.etl/coverage_ndrpdr.py b/csit.infra.etl/coverage_ndrpdr.py index 8a4b0a96b1..d0cda880a8 100644 --- a/csit.infra.etl/coverage_ndrpdr.py +++ b/csit.infra.etl/coverage_ndrpdr.py @@ -141,7 +141,7 @@ paths = wr.s3.list_objects( ignore_empty=True ) -filtered_paths = [path for path in paths if "report-coverage-2506" in path] +filtered_paths = [path for path in paths if "report-coverage-2510" in path] out_sdf = process_json_to_dataframe("ndrpdr", filtered_paths) out_sdf.printSchema() @@ -163,7 +163,7 @@ except KeyError: try: wr.s3.to_parquet( df=out_sdf.toPandas(), - path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/coverage_rls2506", + path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/coverage_rls2510", dataset=True, partition_cols=["test_type", "year", "month", "day"], compression="snappy", diff --git a/csit.infra.etl/coverage_reconf.py b/csit.infra.etl/coverage_reconf.py index f24ffa998f..1452fce19d 100644 --- a/csit.infra.etl/coverage_reconf.py +++ b/csit.infra.etl/coverage_reconf.py @@ -141,7 +141,7 @@ paths = wr.s3.list_objects( ignore_empty=True ) -filtered_paths = [path for path in paths if "report-coverage-2506" in path] +filtered_paths = [path for path in paths if "report-coverage-2510" in path] out_sdf = process_json_to_dataframe("reconf", filtered_paths) out_sdf.printSchema() @@ -163,7 +163,7 @@ except KeyError: try: wr.s3.to_parquet( df=out_sdf.toPandas(), - path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/coverage_rls2506", + path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/coverage_rls2510", dataset=True, partition_cols=["test_type", "year", "month", "day"], compression="snappy", diff --git a/csit.infra.etl/coverage_soak.py b/csit.infra.etl/coverage_soak.py index 82b6f4fb60..24c942bb94 100644 --- a/csit.infra.etl/coverage_soak.py +++ b/csit.infra.etl/coverage_soak.py @@ -141,7 +141,7 @@ paths = wr.s3.list_objects( ignore_empty=True ) -filtered_paths = [path for path in paths if "report-coverage-2506" in path] +filtered_paths = [path for path in paths if "report-coverage-2510" in path] out_sdf = process_json_to_dataframe("soak", filtered_paths) out_sdf.printSchema() @@ -163,7 +163,7 @@ except KeyError: try: wr.s3.to_parquet( df=out_sdf.toPandas(), - path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/coverage_rls2506", + path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/coverage_rls2510", dataset=True, partition_cols=["test_type", "year", "month", "day"], compression="snappy", diff --git a/csit.infra.etl/iterative_hoststack.py b/csit.infra.etl/iterative_hoststack.py index 1a97962f58..779122a1e2 100644 --- a/csit.infra.etl/iterative_hoststack.py +++ b/csit.infra.etl/iterative_hoststack.py @@ -141,7 +141,7 @@ paths = wr.s3.list_objects( ignore_empty=True ) -filtered_paths = [path for path in paths if "report-iterative-2506" in path] +filtered_paths = [path for path in paths if "report-iterative-2510" in path] out_sdf = process_json_to_dataframe("hoststack", filtered_paths) out_sdf.printSchema() @@ -163,7 +163,7 @@ except KeyError: try: wr.s3.to_parquet( df=out_sdf.toPandas(), - path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/iterative_rls2506", + path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/iterative_rls2510", dataset=True, partition_cols=["test_type", "year", "month", "day"], compression="snappy", diff --git a/csit.infra.etl/iterative_mrr.py b/csit.infra.etl/iterative_mrr.py index c3c8ee3586..cd47b27bfb 100644 --- a/csit.infra.etl/iterative_mrr.py +++ b/csit.infra.etl/iterative_mrr.py @@ -141,7 +141,7 @@ paths = wr.s3.list_objects( ignore_empty=True ) -filtered_paths = [path for path in paths if "report-iterative-2506" in path] +filtered_paths = [path for path in paths if "report-iterative-2510" in path] out_sdf = process_json_to_dataframe("mrr", filtered_paths) out_sdf.printSchema() @@ -163,7 +163,7 @@ except KeyError: try: wr.s3.to_parquet( df=out_sdf.toPandas(), - path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/iterative_rls2506", + path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/iterative_rls2510", dataset=True, partition_cols=["test_type", "year", "month", "day"], compression="snappy", diff --git a/csit.infra.etl/iterative_ndrpdr.py b/csit.infra.etl/iterative_ndrpdr.py index 40d783d5db..6e2735803d 100644 --- a/csit.infra.etl/iterative_ndrpdr.py +++ b/csit.infra.etl/iterative_ndrpdr.py @@ -141,7 +141,7 @@ paths = wr.s3.list_objects( ignore_empty=True ) -filtered_paths = [path for path in paths if "report-iterative-2506" in path] +filtered_paths = [path for path in paths if "report-iterative-2510" in path] out_sdf = process_json_to_dataframe("ndrpdr", filtered_paths) out_sdf.printSchema() @@ -163,7 +163,7 @@ except KeyError: try: wr.s3.to_parquet( df=out_sdf.toPandas(), - path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/iterative_rls2506", + path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/iterative_rls2510", dataset=True, partition_cols=["test_type", "year", "month", "day"], compression="snappy", diff --git a/csit.infra.etl/iterative_reconf.py b/csit.infra.etl/iterative_reconf.py index e5eb2efa71..2e709837cc 100644 --- a/csit.infra.etl/iterative_reconf.py +++ b/csit.infra.etl/iterative_reconf.py @@ -141,7 +141,7 @@ paths = wr.s3.list_objects( ignore_empty=True ) -filtered_paths = [path for path in paths if "report-iterative-2506" in path] +filtered_paths = [path for path in paths if "report-iterative-2510" in path] out_sdf = process_json_to_dataframe("reconf", filtered_paths) out_sdf.show(truncate=False) @@ -164,7 +164,7 @@ except KeyError: try: wr.s3.to_parquet( df=out_sdf.toPandas(), - path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/iterative_rls2506", + path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/iterative_rls2510", dataset=True, partition_cols=["test_type", "year", "month", "day"], compression="snappy", diff --git a/csit.infra.etl/iterative_soak.py b/csit.infra.etl/iterative_soak.py index 5b77157321..de4c81fbe6 100644 --- a/csit.infra.etl/iterative_soak.py +++ b/csit.infra.etl/iterative_soak.py @@ -141,7 +141,7 @@ paths = wr.s3.list_objects( ignore_empty=True ) -filtered_paths = [path for path in paths if "report-iterative-2506" in path] +filtered_paths = [path for path in paths if "report-iterative-2510" in path] out_sdf = process_json_to_dataframe("soak", filtered_paths) out_sdf.printSchema() @@ -163,7 +163,7 @@ except KeyError: try: wr.s3.to_parquet( df=out_sdf.toPandas(), - path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/iterative_rls2506", + path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/iterative_rls2510", dataset=True, partition_cols=["test_type", "year", "month", "day"], compression="snappy", diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/versions.tf b/fdio.infra.terraform/terraform-nomad-pyspark-etl/versions.tf index cf67cc25c7..44ed2a29a4 100644 --- a/fdio.infra.terraform/terraform-nomad-pyspark-etl/versions.tf +++ b/fdio.infra.terraform/terraform-nomad-pyspark-etl/versions.tf @@ -7,11 +7,11 @@ terraform { required_providers { nomad = { source = "hashicorp/nomad" - version = ">= 2.3.0" + version = ">= 2.5.0" } vault = { version = ">= 4.6.0" } } - required_version = ">= 1.10.4" + required_version = ">= 1.12.1" }