feat(etl): Release pipelines 16/40916/2
authorpmikus <peter.mikus@protonmail.ch>
Wed, 22 May 2024 12:30:49 +0000 (14:30 +0200)
committerPeter Mikus <peter.mikus@protonmail.ch>
Wed, 22 May 2024 12:56:34 +0000 (12:56 +0000)
Signed-off-by: pmikus <peter.mikus@protonmail.ch>
Change-Id: I4ce20267b4747bf1901b6175e0ec5936b583a510

34 files changed:
csit.infra.etl/coverage_device_rls2406.py [moved from csit.infra.etl/coverage_device_rls2402.py with 89% similarity]
csit.infra.etl/coverage_hoststack_rls2406.py [moved from csit.infra.etl/coverage_hoststack_rls2402.py with 89% similarity]
csit.infra.etl/coverage_mrr_rls2406.py [moved from csit.infra.etl/coverage_mrr_rls2402.py with 89% similarity]
csit.infra.etl/coverage_ndrpdr_rls2406.py [moved from csit.infra.etl/coverage_ndrpdr_rls2402.py with 89% similarity]
csit.infra.etl/coverage_reconf_rls2406.py [moved from csit.infra.etl/coverage_reconf_rls2402.py with 89% similarity]
csit.infra.etl/coverage_soak_rls2406.py [moved from csit.infra.etl/coverage_soak_rls2402.py with 89% similarity]
csit.infra.etl/iterative_hoststack_rls2406.py [moved from csit.infra.etl/iterative_hoststack_rls2402.py with 89% similarity]
csit.infra.etl/iterative_mrr_rls2406.py [moved from csit.infra.etl/iterative_mrr_rls2402.py with 89% similarity]
csit.infra.etl/iterative_ndrpdr_rls2406.py [moved from csit.infra.etl/iterative_ndrpdr_rls2402.py with 89% similarity]
csit.infra.etl/iterative_reconf_rls2406.py [moved from csit.infra.etl/iterative_reconf_rls2402.py with 89% similarity]
csit.infra.etl/iterative_soak_rls2406.py [moved from csit.infra.etl/iterative_soak_rls2402.py with 89% similarity]
csit.infra.etl/stats.py
csit.infra.etl/trending_hoststack.py
csit.infra.etl/trending_mrr.py
csit.infra.etl/trending_ndrpdr.py
csit.infra.etl/trending_reconf.py
csit.infra.etl/trending_soak.py
fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-device-rls2402.hcl.tftpl [deleted file]
fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-device-rls2406.hcl.tftpl [moved from fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-ndrpdr-rls2402.hcl.tftpl with 97% similarity]
fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-hoststack-rls2402.hcl.tftpl [deleted file]
fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-hoststack-rls2406.hcl.tftpl [new file with mode: 0644]
fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-mrr-rls2402.hcl.tftpl [deleted file]
fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-mrr-rls2406.hcl.tftpl [moved from fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-mrr-rls2402.hcl.tftpl with 97% similarity]
fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-ndrpdr-rls2402.hcl.tftpl [deleted file]
fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-ndrpdr-rls2406.hcl.tftpl [moved from fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-reconf-rls2402.hcl.tftpl with 97% similarity]
fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-reconf-rls2402.hcl.tftpl [deleted file]
fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-reconf-rls2406.hcl.tftpl [new file with mode: 0644]
fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-soak-rls2406.hcl.tftpl [moved from fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-soak-rls2402.hcl.tftpl with 97% similarity]
fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-hoststack-rls2406.hcl.tftpl [moved from fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-hoststack-rls2402.hcl.tftpl with 97% similarity]
fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-mrr-rls2406.hcl.tftpl [moved from fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-soak-rls2402.hcl.tftpl with 97% similarity]
fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-ndrpdr-rls2406.hcl.tftpl [new file with mode: 0644]
fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-reconf-rls2406.hcl.tftpl [new file with mode: 0644]
fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-soak-rls2406.hcl.tftpl [new file with mode: 0644]
fdio.infra.terraform/terraform-nomad-pyspark-etl/fdio/main.tf

similarity index 89%
rename from csit.infra.etl/coverage_device_rls2402.py
rename to csit.infra.etl/coverage_device_rls2406.py
index 2db8081..04f4135 100644 (file)
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 
-# Copyright (c) 2023 Cisco and/or its affiliates.
+# Copyright (c) 2024 Cisco and/or its affiliates.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at:
@@ -29,8 +29,8 @@ from pyspark.sql.functions import col, lit, regexp_replace
 from pyspark.sql.types import StructType
 
 
-S3_LOGS_BUCKET="fdio-logs-s3-cloudfront-index"
-S3_DOCS_BUCKET="fdio-docs-s3-cloudfront-index"
+S3_LOGS_BUCKET=environ.get("S3_LOGS_BUCKET", "fdio-logs-s3-cloudfront-index")
+S3_DOCS_BUCKET=environ.get("S3_DOCS_BUCKET", "fdio-docs-s3-cloudfront-index")
 PATH=f"s3://{S3_LOGS_BUCKET}/vex-yul-rot-jenkins-1/csit-vpp-device-*"
 SUFFIX="info.json.gz"
 IGNORE_SUFFIX=[
@@ -141,7 +141,7 @@ paths = wr.s3.list_objects(
     ignore_empty=True
 )
 
-filtered_paths = [path for path in paths if "report-coverage-2402" in path]
+filtered_paths = [path for path in paths if "report-coverage-2406" in path]
 
 out_sdf = process_json_to_dataframe("device", filtered_paths)
 out_sdf.printSchema()
@@ -151,20 +151,26 @@ out_sdf = out_sdf \
     .withColumn("day", lit(datetime.now().day)) \
     .repartition(1)
 
+try:
+    boto3_session = session.Session(
+        aws_access_key_id=environ["OUT_AWS_ACCESS_KEY_ID"],
+        aws_secret_access_key=environ["OUT_AWS_SECRET_ACCESS_KEY"],
+        region_name=environ["OUT_AWS_DEFAULT_REGION"]
+    )
+except KeyError:
+    boto3_session = session.Session()
+)
+
 try:
     wr.s3.to_parquet(
         df=out_sdf.toPandas(),
-        path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/coverage_rls2402",
+        path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/coverage_rls2406",
         dataset=True,
         partition_cols=["test_type", "year", "month", "day"],
         compression="snappy",
         use_threads=True,
         mode="overwrite_partitions",
-        boto3_session=session.Session(
-            aws_access_key_id=environ["OUT_AWS_ACCESS_KEY_ID"],
-            aws_secret_access_key=environ["OUT_AWS_SECRET_ACCESS_KEY"],
-            region_name=environ["OUT_AWS_DEFAULT_REGION"]
-        )
+        boto3_session=boto3_session
     )
 except EmptyDataFrame:
     pass
similarity index 89%
rename from csit.infra.etl/coverage_hoststack_rls2402.py
rename to csit.infra.etl/coverage_hoststack_rls2406.py
index 27eb9e8..75edd98 100644 (file)
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 
-# Copyright (c) 2023 Cisco and/or its affiliates.
+# Copyright (c) 2024 Cisco and/or its affiliates.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at:
@@ -29,8 +29,8 @@ from pyspark.sql.functions import col, lit, regexp_replace
 from pyspark.sql.types import StructType
 
 
-S3_LOGS_BUCKET="fdio-logs-s3-cloudfront-index"
-S3_DOCS_BUCKET="fdio-docs-s3-cloudfront-index"
+S3_LOGS_BUCKET=environ.get("S3_LOGS_BUCKET", "fdio-logs-s3-cloudfront-index")
+S3_DOCS_BUCKET=environ.get("S3_DOCS_BUCKET", "fdio-docs-s3-cloudfront-index")
 PATH=f"s3://{S3_LOGS_BUCKET}/vex-yul-rot-jenkins-1/csit-*-perf-*"
 SUFFIX="info.json.gz"
 IGNORE_SUFFIX=[
@@ -141,10 +141,9 @@ paths = wr.s3.list_objects(
     ignore_empty=True
 )
 
-filtered_paths = [path for path in paths if "report-coverage-2402" in path]
+filtered_paths = [path for path in paths if "report-coverage-2406" in path]
 
 out_sdf = process_json_to_dataframe("hoststack", filtered_paths)
-out_sdf.show(truncate=False)
 out_sdf.printSchema()
 out_sdf = out_sdf \
     .withColumn("year", lit(datetime.now().year)) \
@@ -152,20 +151,26 @@ out_sdf = out_sdf \
     .withColumn("day", lit(datetime.now().day)) \
     .repartition(1)
 
+try:
+    boto3_session = session.Session(
+        aws_access_key_id=environ["OUT_AWS_ACCESS_KEY_ID"],
+        aws_secret_access_key=environ["OUT_AWS_SECRET_ACCESS_KEY"],
+        region_name=environ["OUT_AWS_DEFAULT_REGION"]
+    )
+except KeyError:
+    boto3_session = session.Session()
+)
+
 try:
     wr.s3.to_parquet(
         df=out_sdf.toPandas(),
-        path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/coverage_rls2402",
+        path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/coverage_rls2406",
         dataset=True,
         partition_cols=["test_type", "year", "month", "day"],
         compression="snappy",
         use_threads=True,
         mode="overwrite_partitions",
-        boto3_session=session.Session(
-            aws_access_key_id=environ["OUT_AWS_ACCESS_KEY_ID"],
-            aws_secret_access_key=environ["OUT_AWS_SECRET_ACCESS_KEY"],
-            region_name=environ["OUT_AWS_DEFAULT_REGION"]
-        )
+        boto3_session=boto3_session
     )
 except EmptyDataFrame:
     pass
similarity index 89%
rename from csit.infra.etl/coverage_mrr_rls2402.py
rename to csit.infra.etl/coverage_mrr_rls2406.py
index e68e4f0..b84c077 100644 (file)
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 
-# Copyright (c) 2023 Cisco and/or its affiliates.
+# Copyright (c) 2024 Cisco and/or its affiliates.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at:
@@ -29,8 +29,8 @@ from pyspark.sql.functions import col, lit, regexp_replace
 from pyspark.sql.types import StructType
 
 
-S3_LOGS_BUCKET="fdio-logs-s3-cloudfront-index"
-S3_DOCS_BUCKET="fdio-docs-s3-cloudfront-index"
+S3_LOGS_BUCKET=environ.get("S3_LOGS_BUCKET", "fdio-logs-s3-cloudfront-index")
+S3_DOCS_BUCKET=environ.get("S3_DOCS_BUCKET", "fdio-docs-s3-cloudfront-index")
 PATH=f"s3://{S3_LOGS_BUCKET}/vex-yul-rot-jenkins-1/csit-*-perf-*"
 SUFFIX="info.json.gz"
 IGNORE_SUFFIX=[
@@ -141,7 +141,7 @@ paths = wr.s3.list_objects(
     ignore_empty=True
 )
 
-filtered_paths = [path for path in paths if "report-coverage-2402" in path]
+filtered_paths = [path for path in paths if "report-coverage-2406" in path]
 
 out_sdf = process_json_to_dataframe("mrr", filtered_paths)
 out_sdf.printSchema()
@@ -151,20 +151,26 @@ out_sdf = out_sdf \
     .withColumn("day", lit(datetime.now().day)) \
     .repartition(1)
 
+try:
+    boto3_session = session.Session(
+        aws_access_key_id=environ["OUT_AWS_ACCESS_KEY_ID"],
+        aws_secret_access_key=environ["OUT_AWS_SECRET_ACCESS_KEY"],
+        region_name=environ["OUT_AWS_DEFAULT_REGION"]
+    )
+except KeyError:
+    boto3_session = session.Session()
+)
+
 try:
     wr.s3.to_parquet(
         df=out_sdf.toPandas(),
-        path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/coverage_rls2402",
+        path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/coverage_rls2406",
         dataset=True,
         partition_cols=["test_type", "year", "month", "day"],
         compression="snappy",
         use_threads=True,
         mode="overwrite_partitions",
-        boto3_session=session.Session(
-            aws_access_key_id=environ["OUT_AWS_ACCESS_KEY_ID"],
-            aws_secret_access_key=environ["OUT_AWS_SECRET_ACCESS_KEY"],
-            region_name=environ["OUT_AWS_DEFAULT_REGION"]
-        )
+        boto3_session=boto3_session
     )
 except EmptyDataFrame:
     pass
similarity index 89%
rename from csit.infra.etl/coverage_ndrpdr_rls2402.py
rename to csit.infra.etl/coverage_ndrpdr_rls2406.py
index 730e3ea..ee0f878 100644 (file)
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 
-# Copyright (c) 2023 Cisco and/or its affiliates.
+# Copyright (c) 2024 Cisco and/or its affiliates.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at:
@@ -29,8 +29,8 @@ from pyspark.sql.functions import col, lit, regexp_replace
 from pyspark.sql.types import StructType
 
 
-S3_LOGS_BUCKET="fdio-logs-s3-cloudfront-index"
-S3_DOCS_BUCKET="fdio-docs-s3-cloudfront-index"
+S3_LOGS_BUCKET=environ.get("S3_LOGS_BUCKET", "fdio-logs-s3-cloudfront-index")
+S3_DOCS_BUCKET=environ.get("S3_DOCS_BUCKET", "fdio-docs-s3-cloudfront-index")
 PATH=f"s3://{S3_LOGS_BUCKET}/vex-yul-rot-jenkins-1/csit-*-perf-*"
 SUFFIX="info.json.gz"
 IGNORE_SUFFIX=[
@@ -141,7 +141,7 @@ paths = wr.s3.list_objects(
     ignore_empty=True
 )
 
-filtered_paths = [path for path in paths if "report-coverage-2402" in path]
+filtered_paths = [path for path in paths if "report-coverage-2406" in path]
 
 out_sdf = process_json_to_dataframe("ndrpdr", filtered_paths)
 out_sdf.printSchema()
@@ -151,20 +151,26 @@ out_sdf = out_sdf \
     .withColumn("day", lit(datetime.now().day)) \
     .repartition(1)
 
+try:
+    boto3_session = session.Session(
+        aws_access_key_id=environ["OUT_AWS_ACCESS_KEY_ID"],
+        aws_secret_access_key=environ["OUT_AWS_SECRET_ACCESS_KEY"],
+        region_name=environ["OUT_AWS_DEFAULT_REGION"]
+    )
+except KeyError:
+    boto3_session = session.Session()
+)
+
 try:
     wr.s3.to_parquet(
         df=out_sdf.toPandas(),
-        path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/coverage_rls2402",
+        path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/coverage_rls2406",
         dataset=True,
         partition_cols=["test_type", "year", "month", "day"],
         compression="snappy",
         use_threads=True,
         mode="overwrite_partitions",
-        boto3_session=session.Session(
-            aws_access_key_id=environ["OUT_AWS_ACCESS_KEY_ID"],
-            aws_secret_access_key=environ["OUT_AWS_SECRET_ACCESS_KEY"],
-            region_name=environ["OUT_AWS_DEFAULT_REGION"]
-        )
+        boto3_session=boto3_session
     )
 except EmptyDataFrame:
     pass
similarity index 89%
rename from csit.infra.etl/coverage_reconf_rls2402.py
rename to csit.infra.etl/coverage_reconf_rls2406.py
index dc1f647..33dbac7 100644 (file)
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 
-# Copyright (c) 2023 Cisco and/or its affiliates.
+# Copyright (c) 2024 Cisco and/or its affiliates.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at:
@@ -29,8 +29,8 @@ from pyspark.sql.functions import col, lit, regexp_replace
 from pyspark.sql.types import StructType
 
 
-S3_LOGS_BUCKET="fdio-logs-s3-cloudfront-index"
-S3_DOCS_BUCKET="fdio-docs-s3-cloudfront-index"
+S3_LOGS_BUCKET=environ.get("S3_LOGS_BUCKET", "fdio-logs-s3-cloudfront-index")
+S3_DOCS_BUCKET=environ.get("S3_DOCS_BUCKET", "fdio-docs-s3-cloudfront-index")
 PATH=f"s3://{S3_LOGS_BUCKET}/vex-yul-rot-jenkins-1/csit-*-perf-*"
 SUFFIX="info.json.gz"
 IGNORE_SUFFIX=[
@@ -141,10 +141,9 @@ paths = wr.s3.list_objects(
     ignore_empty=True
 )
 
-filtered_paths = [path for path in paths if "report-coverage-2402" in path]
+filtered_paths = [path for path in paths if "report-coverage-2406" in path]
 
 out_sdf = process_json_to_dataframe("reconf", filtered_paths)
-out_sdf.show(truncate=False)
 out_sdf.printSchema()
 out_sdf = out_sdf \
     .withColumn("year", lit(datetime.now().year)) \
@@ -152,20 +151,26 @@ out_sdf = out_sdf \
     .withColumn("day", lit(datetime.now().day)) \
     .repartition(1)
 
+try:
+    boto3_session = session.Session(
+        aws_access_key_id=environ["OUT_AWS_ACCESS_KEY_ID"],
+        aws_secret_access_key=environ["OUT_AWS_SECRET_ACCESS_KEY"],
+        region_name=environ["OUT_AWS_DEFAULT_REGION"]
+    )
+except KeyError:
+    boto3_session = session.Session()
+)
+
 try:
     wr.s3.to_parquet(
         df=out_sdf.toPandas(),
-        path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/coverage_rls2402",
+        path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/coverage_rls2406",
         dataset=True,
         partition_cols=["test_type", "year", "month", "day"],
         compression="snappy",
         use_threads=True,
         mode="overwrite_partitions",
-        boto3_session=session.Session(
-            aws_access_key_id=environ["OUT_AWS_ACCESS_KEY_ID"],
-            aws_secret_access_key=environ["OUT_AWS_SECRET_ACCESS_KEY"],
-            region_name=environ["OUT_AWS_DEFAULT_REGION"]
-        )
+        boto3_session=boto3_session
     )
 except EmptyDataFrame:
     pass
similarity index 89%
rename from csit.infra.etl/coverage_soak_rls2402.py
rename to csit.infra.etl/coverage_soak_rls2406.py
index 7d87afd..3b13c16 100644 (file)
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 
-# Copyright (c) 2023 Cisco and/or its affiliates.
+# Copyright (c) 2024 Cisco and/or its affiliates.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at:
@@ -29,8 +29,8 @@ from pyspark.sql.functions import col, lit, regexp_replace
 from pyspark.sql.types import StructType
 
 
-S3_LOGS_BUCKET="fdio-logs-s3-cloudfront-index"
-S3_DOCS_BUCKET="fdio-docs-s3-cloudfront-index"
+S3_LOGS_BUCKET=environ.get("S3_LOGS_BUCKET", "fdio-logs-s3-cloudfront-index")
+S3_DOCS_BUCKET=environ.get("S3_DOCS_BUCKET", "fdio-docs-s3-cloudfront-index")
 PATH=f"s3://{S3_LOGS_BUCKET}/vex-yul-rot-jenkins-1/csit-*-perf-*"
 SUFFIX="info.json.gz"
 IGNORE_SUFFIX=[
@@ -141,7 +141,7 @@ paths = wr.s3.list_objects(
     ignore_empty=True
 )
 
-filtered_paths = [path for path in paths if "report-coverage-2402" in path]
+filtered_paths = [path for path in paths if "report-coverage-2406" in path]
 
 out_sdf = process_json_to_dataframe("soak", filtered_paths)
 out_sdf.printSchema()
@@ -151,20 +151,26 @@ out_sdf = out_sdf \
     .withColumn("day", lit(datetime.now().day)) \
     .repartition(1)
 
+try:
+    boto3_session = session.Session(
+        aws_access_key_id=environ["OUT_AWS_ACCESS_KEY_ID"],
+        aws_secret_access_key=environ["OUT_AWS_SECRET_ACCESS_KEY"],
+        region_name=environ["OUT_AWS_DEFAULT_REGION"]
+    )
+except KeyError:
+    boto3_session = session.Session()
+)
+
 try:
     wr.s3.to_parquet(
         df=out_sdf.toPandas(),
-        path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/coverage_rls2402",
+        path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/coverage_rls2406",
         dataset=True,
         partition_cols=["test_type", "year", "month", "day"],
         compression="snappy",
         use_threads=True,
         mode="overwrite_partitions",
-        boto3_session=session.Session(
-            aws_access_key_id=environ["OUT_AWS_ACCESS_KEY_ID"],
-            aws_secret_access_key=environ["OUT_AWS_SECRET_ACCESS_KEY"],
-            region_name=environ["OUT_AWS_DEFAULT_REGION"]
-        )
+        boto3_session=boto3_session
     )
 except EmptyDataFrame:
     pass
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 
-# Copyright (c) 2023 Cisco and/or its affiliates.
+# Copyright (c) 2024 Cisco and/or its affiliates.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at:
@@ -29,8 +29,8 @@ from pyspark.sql.functions import col, lit, regexp_replace
 from pyspark.sql.types import StructType
 
 
-S3_LOGS_BUCKET="fdio-logs-s3-cloudfront-index"
-S3_DOCS_BUCKET="fdio-docs-s3-cloudfront-index"
+S3_LOGS_BUCKET=environ.get("S3_LOGS_BUCKET", "fdio-logs-s3-cloudfront-index")
+S3_DOCS_BUCKET=environ.get("S3_DOCS_BUCKET", "fdio-docs-s3-cloudfront-index")
 PATH=f"s3://{S3_LOGS_BUCKET}/vex-yul-rot-jenkins-1/csit-*-perf-*"
 SUFFIX="info.json.gz"
 IGNORE_SUFFIX=[
@@ -141,10 +141,9 @@ paths = wr.s3.list_objects(
     ignore_empty=True
 )
 
-filtered_paths = [path for path in paths if "report-iterative-2402" in path]
+filtered_paths = [path for path in paths if "report-iterative-2406" in path]
 
 out_sdf = process_json_to_dataframe("hoststack", filtered_paths)
-out_sdf.show(truncate=False)
 out_sdf.printSchema()
 out_sdf = out_sdf \
     .withColumn("year", lit(datetime.now().year)) \
@@ -152,20 +151,26 @@ out_sdf = out_sdf \
     .withColumn("day", lit(datetime.now().day)) \
     .repartition(1)
 
+try:
+    boto3_session = session.Session(
+        aws_access_key_id=environ["OUT_AWS_ACCESS_KEY_ID"],
+        aws_secret_access_key=environ["OUT_AWS_SECRET_ACCESS_KEY"],
+        region_name=environ["OUT_AWS_DEFAULT_REGION"]
+    )
+except KeyError:
+    boto3_session = session.Session()
+)
+
 try:
     wr.s3.to_parquet(
         df=out_sdf.toPandas(),
-        path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/iterative_rls2402",
+        path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/iterative_rls2406",
         dataset=True,
         partition_cols=["test_type", "year", "month", "day"],
         compression="snappy",
         use_threads=True,
         mode="overwrite_partitions",
-        boto3_session=session.Session(
-            aws_access_key_id=environ["OUT_AWS_ACCESS_KEY_ID"],
-            aws_secret_access_key=environ["OUT_AWS_SECRET_ACCESS_KEY"],
-            region_name=environ["OUT_AWS_DEFAULT_REGION"]
-        )
+        boto3_session=boto3_session
     )
 except EmptyDataFrame:
     pass
similarity index 89%
rename from csit.infra.etl/iterative_mrr_rls2402.py
rename to csit.infra.etl/iterative_mrr_rls2406.py
index e779dbd..9abb343 100644 (file)
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 
-# Copyright (c) 2023 Cisco and/or its affiliates.
+# Copyright (c) 2024 Cisco and/or its affiliates.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at:
@@ -29,8 +29,8 @@ from pyspark.sql.functions import col, lit, regexp_replace
 from pyspark.sql.types import StructType
 
 
-S3_LOGS_BUCKET="fdio-logs-s3-cloudfront-index"
-S3_DOCS_BUCKET="fdio-docs-s3-cloudfront-index"
+S3_LOGS_BUCKET=environ.get("S3_LOGS_BUCKET", "fdio-logs-s3-cloudfront-index")
+S3_DOCS_BUCKET=environ.get("S3_DOCS_BUCKET", "fdio-docs-s3-cloudfront-index")
 PATH=f"s3://{S3_LOGS_BUCKET}/vex-yul-rot-jenkins-1/csit-*-perf-*"
 SUFFIX="info.json.gz"
 IGNORE_SUFFIX=[
@@ -141,7 +141,7 @@ paths = wr.s3.list_objects(
     ignore_empty=True
 )
 
-filtered_paths = [path for path in paths if "report-iterative-2402" in path]
+filtered_paths = [path for path in paths if "report-iterative-2406" in path]
 
 out_sdf = process_json_to_dataframe("mrr", filtered_paths)
 out_sdf.printSchema()
@@ -151,20 +151,26 @@ out_sdf = out_sdf \
     .withColumn("day", lit(datetime.now().day)) \
     .repartition(1)
 
+try:
+    boto3_session = session.Session(
+        aws_access_key_id=environ["OUT_AWS_ACCESS_KEY_ID"],
+        aws_secret_access_key=environ["OUT_AWS_SECRET_ACCESS_KEY"],
+        region_name=environ["OUT_AWS_DEFAULT_REGION"]
+    )
+except KeyError:
+    boto3_session = session.Session()
+)
+
 try:
     wr.s3.to_parquet(
         df=out_sdf.toPandas(),
-        path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/iterative_rls2402",
+        path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/iterative_rls2406",
         dataset=True,
         partition_cols=["test_type", "year", "month", "day"],
         compression="snappy",
         use_threads=True,
         mode="overwrite_partitions",
-        boto3_session=session.Session(
-            aws_access_key_id=environ["OUT_AWS_ACCESS_KEY_ID"],
-            aws_secret_access_key=environ["OUT_AWS_SECRET_ACCESS_KEY"],
-            region_name=environ["OUT_AWS_DEFAULT_REGION"]
-        )
+        boto3_session=boto3_session
     )
 except EmptyDataFrame:
     pass
similarity index 89%
rename from csit.infra.etl/iterative_ndrpdr_rls2402.py
rename to csit.infra.etl/iterative_ndrpdr_rls2406.py
index 9231176..21a6c46 100644 (file)
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 
-# Copyright (c) 2023 Cisco and/or its affiliates.
+# Copyright (c) 2024 Cisco and/or its affiliates.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at:
@@ -29,8 +29,8 @@ from pyspark.sql.functions import col, lit, regexp_replace
 from pyspark.sql.types import StructType
 
 
-S3_LOGS_BUCKET="fdio-logs-s3-cloudfront-index"
-S3_DOCS_BUCKET="fdio-docs-s3-cloudfront-index"
+S3_LOGS_BUCKET=environ.get("S3_LOGS_BUCKET", "fdio-logs-s3-cloudfront-index")
+S3_DOCS_BUCKET=environ.get("S3_DOCS_BUCKET", "fdio-docs-s3-cloudfront-index")
 PATH=f"s3://{S3_LOGS_BUCKET}/vex-yul-rot-jenkins-1/csit-*-perf-*"
 SUFFIX="info.json.gz"
 IGNORE_SUFFIX=[
@@ -141,7 +141,7 @@ paths = wr.s3.list_objects(
     ignore_empty=True
 )
 
-filtered_paths = [path for path in paths if "report-iterative-2402" in path]
+filtered_paths = [path for path in paths if "report-iterative-2406" in path]
 
 out_sdf = process_json_to_dataframe("ndrpdr", filtered_paths)
 out_sdf.printSchema()
@@ -151,20 +151,26 @@ out_sdf = out_sdf \
     .withColumn("day", lit(datetime.now().day)) \
     .repartition(1)
 
+try:
+    boto3_session = session.Session(
+        aws_access_key_id=environ["OUT_AWS_ACCESS_KEY_ID"],
+        aws_secret_access_key=environ["OUT_AWS_SECRET_ACCESS_KEY"],
+        region_name=environ["OUT_AWS_DEFAULT_REGION"]
+    )
+except KeyError:
+    boto3_session = session.Session()
+)
+
 try:
     wr.s3.to_parquet(
         df=out_sdf.toPandas(),
-        path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/iterative_rls2402",
+        path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/iterative_rls2406",
         dataset=True,
         partition_cols=["test_type", "year", "month", "day"],
         compression="snappy",
         use_threads=True,
         mode="overwrite_partitions",
-        boto3_session=session.Session(
-            aws_access_key_id=environ["OUT_AWS_ACCESS_KEY_ID"],
-            aws_secret_access_key=environ["OUT_AWS_SECRET_ACCESS_KEY"],
-            region_name=environ["OUT_AWS_DEFAULT_REGION"]
-        )
+        boto3_session=boto3_session
     )
 except EmptyDataFrame:
     pass
similarity index 89%
rename from csit.infra.etl/iterative_reconf_rls2402.py
rename to csit.infra.etl/iterative_reconf_rls2406.py
index 1beeb16..e9b0681 100644 (file)
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 
-# Copyright (c) 2023 Cisco and/or its affiliates.
+# Copyright (c) 2024 Cisco and/or its affiliates.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at:
@@ -29,8 +29,8 @@ from pyspark.sql.functions import col, lit, regexp_replace
 from pyspark.sql.types import StructType
 
 
-S3_LOGS_BUCKET="fdio-logs-s3-cloudfront-index"
-S3_DOCS_BUCKET="fdio-docs-s3-cloudfront-index"
+S3_LOGS_BUCKET=environ.get("S3_LOGS_BUCKET", "fdio-logs-s3-cloudfront-index")
+S3_DOCS_BUCKET=environ.get("S3_DOCS_BUCKET", "fdio-docs-s3-cloudfront-index")
 PATH=f"s3://{S3_LOGS_BUCKET}/vex-yul-rot-jenkins-1/csit-*-perf-*"
 SUFFIX="info.json.gz"
 IGNORE_SUFFIX=[
@@ -141,7 +141,7 @@ paths = wr.s3.list_objects(
     ignore_empty=True
 )
 
-filtered_paths = [path for path in paths if "report-iterative-2402" in path]
+filtered_paths = [path for path in paths if "report-iterative-2406" in path]
 
 out_sdf = process_json_to_dataframe("reconf", filtered_paths)
 out_sdf.show(truncate=False)
@@ -152,20 +152,26 @@ out_sdf = out_sdf \
     .withColumn("day", lit(datetime.now().day)) \
     .repartition(1)
 
+try:
+    boto3_session = session.Session(
+        aws_access_key_id=environ["OUT_AWS_ACCESS_KEY_ID"],
+        aws_secret_access_key=environ["OUT_AWS_SECRET_ACCESS_KEY"],
+        region_name=environ["OUT_AWS_DEFAULT_REGION"]
+    )
+except KeyError:
+    boto3_session = session.Session()
+)
+
 try:
     wr.s3.to_parquet(
         df=out_sdf.toPandas(),
-        path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/iterative_rls2402",
+        path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/iterative_rls2406",
         dataset=True,
         partition_cols=["test_type", "year", "month", "day"],
         compression="snappy",
         use_threads=True,
         mode="overwrite_partitions",
-        boto3_session=session.Session(
-            aws_access_key_id=environ["OUT_AWS_ACCESS_KEY_ID"],
-            aws_secret_access_key=environ["OUT_AWS_SECRET_ACCESS_KEY"],
-            region_name=environ["OUT_AWS_DEFAULT_REGION"]
-        )
+        boto3_session=boto3_session
     )
 except EmptyDataFrame:
     pass
similarity index 89%
rename from csit.infra.etl/iterative_soak_rls2402.py
rename to csit.infra.etl/iterative_soak_rls2406.py
index 55c6eb4..6b05e30 100644 (file)
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 
-# Copyright (c) 2023 Cisco and/or its affiliates.
+# Copyright (c) 2024 Cisco and/or its affiliates.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at:
@@ -29,8 +29,8 @@ from pyspark.sql.functions import col, lit, regexp_replace
 from pyspark.sql.types import StructType
 
 
-S3_LOGS_BUCKET="fdio-logs-s3-cloudfront-index"
-S3_DOCS_BUCKET="fdio-docs-s3-cloudfront-index"
+S3_LOGS_BUCKET=environ.get("S3_LOGS_BUCKET", "fdio-logs-s3-cloudfront-index")
+S3_DOCS_BUCKET=environ.get("S3_DOCS_BUCKET", "fdio-docs-s3-cloudfront-index")
 PATH=f"s3://{S3_LOGS_BUCKET}/vex-yul-rot-jenkins-1/csit-*-perf-*"
 SUFFIX="info.json.gz"
 IGNORE_SUFFIX=[
@@ -141,7 +141,7 @@ paths = wr.s3.list_objects(
     ignore_empty=True
 )
 
-filtered_paths = [path for path in paths if "report-iterative-2402" in path]
+filtered_paths = [path for path in paths if "report-iterative-2406" in path]
 
 out_sdf = process_json_to_dataframe("soak", filtered_paths)
 out_sdf.printSchema()
@@ -151,20 +151,26 @@ out_sdf = out_sdf \
     .withColumn("day", lit(datetime.now().day)) \
     .repartition(1)
 
+try:
+    boto3_session = session.Session(
+        aws_access_key_id=environ["OUT_AWS_ACCESS_KEY_ID"],
+        aws_secret_access_key=environ["OUT_AWS_SECRET_ACCESS_KEY"],
+        region_name=environ["OUT_AWS_DEFAULT_REGION"]
+    )
+except KeyError:
+    boto3_session = session.Session()
+)
+
 try:
     wr.s3.to_parquet(
         df=out_sdf.toPandas(),
-        path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/iterative_rls2402",
+        path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/iterative_rls2406",
         dataset=True,
         partition_cols=["test_type", "year", "month", "day"],
         compression="snappy",
         use_threads=True,
         mode="overwrite_partitions",
-        boto3_session=session.Session(
-            aws_access_key_id=environ["OUT_AWS_ACCESS_KEY_ID"],
-            aws_secret_access_key=environ["OUT_AWS_SECRET_ACCESS_KEY"],
-            region_name=environ["OUT_AWS_DEFAULT_REGION"]
-        )
+        boto3_session=boto3_session
     )
 except EmptyDataFrame:
     pass
index 5d44caa..08ce4a9 100644 (file)
@@ -28,8 +28,9 @@ from pyspark.context import SparkContext
 from pyspark.sql.functions import lit
 from pyspark.sql.types import StructType
 
-S3_LOGS_BUCKET="fdio-logs-s3-cloudfront-index"
-S3_DOCS_BUCKET="fdio-docs-s3-cloudfront-index"
+
+S3_LOGS_BUCKET=environ.get("S3_LOGS_BUCKET", "fdio-logs-s3-cloudfront-index")
+S3_DOCS_BUCKET=environ.get("S3_DOCS_BUCKET", "fdio-docs-s3-cloudfront-index")
 PATH=f"s3://{S3_LOGS_BUCKET}/vex-yul-rot-jenkins-1/csit-*-perf-*"
 SUFFIX="suite.info.json.gz"
 IGNORE_SUFFIX=[]
@@ -106,7 +107,6 @@ paths = wr.s3.list_objects(
 
 for schema_name in ["sra"]:
     out_sdf = process_json_to_dataframe(schema_name, paths)
-    out_sdf.show(truncate=False)
     out_sdf.printSchema()
     out_sdf = out_sdf \
         .withColumn("year", lit(datetime.now().year)) \
@@ -114,6 +114,16 @@ for schema_name in ["sra"]:
         .withColumn("day", lit(datetime.now().day)) \
         .repartition(1)
 
+    try:
+        boto3_session = session.Session(
+            aws_access_key_id=environ["OUT_AWS_ACCESS_KEY_ID"],
+            aws_secret_access_key=environ["OUT_AWS_SECRET_ACCESS_KEY"],
+            region_name=environ["OUT_AWS_DEFAULT_REGION"]
+        )
+    except KeyError:
+        boto3_session = session.Session()
+    )
+
     try:
         wr.s3.to_parquet(
             df=out_sdf.toPandas(),
@@ -123,11 +133,7 @@ for schema_name in ["sra"]:
             compression="snappy",
             use_threads=True,
             mode="overwrite_partitions",
-            boto3_session=session.Session(
-                aws_access_key_id=environ["OUT_AWS_ACCESS_KEY_ID"],
-                aws_secret_access_key=environ["OUT_AWS_SECRET_ACCESS_KEY"],
-                region_name=environ["OUT_AWS_DEFAULT_REGION"]
-            )
+            boto3_session=boto3_session
         )
     except EmptyDataFrame:
         pass
index 85cab5a..45cb5c9 100644 (file)
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 
-# Copyright (c) 2023 Cisco and/or its affiliates.
+# Copyright (c) 2024 Cisco and/or its affiliates.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at:
@@ -29,8 +29,8 @@ from pyspark.sql.functions import col, lit, regexp_replace
 from pyspark.sql.types import StructType
 
 
-S3_LOGS_BUCKET="fdio-logs-s3-cloudfront-index"
-S3_DOCS_BUCKET="fdio-docs-s3-cloudfront-index"
+S3_LOGS_BUCKET=environ.get("S3_LOGS_BUCKET", "fdio-logs-s3-cloudfront-index")
+S3_DOCS_BUCKET=environ.get("S3_DOCS_BUCKET", "fdio-docs-s3-cloudfront-index")
 PATH=f"s3://{S3_LOGS_BUCKET}/vex-yul-rot-jenkins-1/csit-*-perf-*"
 SUFFIX="info.json.gz"
 IGNORE_SUFFIX=[
@@ -144,7 +144,6 @@ paths = wr.s3.list_objects(
 filtered_paths = [path for path in paths if "daily" in path or "weekly" in path]
 
 out_sdf = process_json_to_dataframe("hoststack", filtered_paths)
-out_sdf.show(truncate=False)
 out_sdf.printSchema()
 out_sdf = out_sdf \
     .withColumn("year", lit(datetime.now().year)) \
@@ -152,6 +151,16 @@ out_sdf = out_sdf \
     .withColumn("day", lit(datetime.now().day)) \
     .repartition(1)
 
+try:
+    boto3_session = session.Session(
+        aws_access_key_id=environ["OUT_AWS_ACCESS_KEY_ID"],
+        aws_secret_access_key=environ["OUT_AWS_SECRET_ACCESS_KEY"],
+        region_name=environ["OUT_AWS_DEFAULT_REGION"]
+    )
+except KeyError:
+    boto3_session = session.Session()
+)
+
 try:
     wr.s3.to_parquet(
         df=out_sdf.toPandas(),
@@ -161,11 +170,7 @@ try:
         compression="snappy",
         use_threads=True,
         mode="overwrite_partitions",
-        boto3_session=session.Session(
-            aws_access_key_id=environ["OUT_AWS_ACCESS_KEY_ID"],
-            aws_secret_access_key=environ["OUT_AWS_SECRET_ACCESS_KEY"],
-            region_name=environ["OUT_AWS_DEFAULT_REGION"]
-        )
+        boto3_session=boto3_session
     )
 except EmptyDataFrame:
     pass
index a00c5fb..b42aaca 100644 (file)
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 
-# Copyright (c) 2023 Cisco and/or its affiliates.
+# Copyright (c) 2024 Cisco and/or its affiliates.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at:
@@ -29,8 +29,8 @@ from pyspark.sql.functions import col, lit, regexp_replace
 from pyspark.sql.types import StructType
 
 
-S3_LOGS_BUCKET="fdio-logs-s3-cloudfront-index"
-S3_DOCS_BUCKET="fdio-docs-s3-cloudfront-index"
+S3_LOGS_BUCKET=environ.get("S3_LOGS_BUCKET", "fdio-logs-s3-cloudfront-index")
+S3_DOCS_BUCKET=environ.get("S3_DOCS_BUCKET", "fdio-docs-s3-cloudfront-index")
 PATH=f"s3://{S3_LOGS_BUCKET}/vex-yul-rot-jenkins-1/csit-*-perf-*"
 SUFFIX="info.json.gz"
 IGNORE_SUFFIX=[
@@ -152,6 +152,16 @@ out_sdf = out_sdf \
     .withColumn("day", lit(datetime.now().day)) \
     .repartition(1)
 
+try:
+    boto3_session = session.Session(
+        aws_access_key_id=environ["OUT_AWS_ACCESS_KEY_ID"],
+        aws_secret_access_key=environ["OUT_AWS_SECRET_ACCESS_KEY"],
+        region_name=environ["OUT_AWS_DEFAULT_REGION"]
+    )
+except KeyError:
+    boto3_session = session.Session()
+)
+
 try:
     wr.s3.to_parquet(
         df=out_sdf.toPandas(),
@@ -161,11 +171,7 @@ try:
         compression="snappy",
         use_threads=True,
         mode="overwrite_partitions",
-        boto3_session=session.Session(
-            aws_access_key_id=environ["OUT_AWS_ACCESS_KEY_ID"],
-            aws_secret_access_key=environ["OUT_AWS_SECRET_ACCESS_KEY"],
-            region_name=environ["OUT_AWS_DEFAULT_REGION"]
-        )
+        boto3_session=boto3_session
     )
 except EmptyDataFrame:
     pass
index e35d27b..96582f5 100644 (file)
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 
-# Copyright (c) 2023 Cisco and/or its affiliates.
+# Copyright (c) 2024 Cisco and/or its affiliates.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at:
@@ -29,8 +29,8 @@ from pyspark.sql.functions import col, lit, regexp_replace
 from pyspark.sql.types import StructType
 
 
-S3_LOGS_BUCKET="fdio-logs-s3-cloudfront-index"
-S3_DOCS_BUCKET="fdio-docs-s3-cloudfront-index"
+S3_LOGS_BUCKET=environ.get("S3_LOGS_BUCKET", "fdio-logs-s3-cloudfront-index")
+S3_DOCS_BUCKET=environ.get("S3_DOCS_BUCKET", "fdio-docs-s3-cloudfront-index")
 PATH=f"s3://{S3_LOGS_BUCKET}/vex-yul-rot-jenkins-1/csit-*-perf-*"
 SUFFIX="info.json.gz"
 IGNORE_SUFFIX=[
@@ -152,6 +152,16 @@ out_sdf = out_sdf \
     .withColumn("day", lit(datetime.now().day)) \
     .repartition(1)
 
+try:
+    boto3_session = session.Session(
+        aws_access_key_id=environ["OUT_AWS_ACCESS_KEY_ID"],
+        aws_secret_access_key=environ["OUT_AWS_SECRET_ACCESS_KEY"],
+        region_name=environ["OUT_AWS_DEFAULT_REGION"]
+    )
+except KeyError:
+    boto3_session = session.Session()
+)
+
 try:
     wr.s3.to_parquet(
         df=out_sdf.toPandas(),
@@ -161,11 +171,7 @@ try:
         compression="snappy",
         use_threads=True,
         mode="overwrite_partitions",
-        boto3_session=session.Session(
-            aws_access_key_id=environ["OUT_AWS_ACCESS_KEY_ID"],
-            aws_secret_access_key=environ["OUT_AWS_SECRET_ACCESS_KEY"],
-            region_name=environ["OUT_AWS_DEFAULT_REGION"]
-        )
+        boto3_session=boto3_session
     )
 except EmptyDataFrame:
     pass
index 94e6199..08287a7 100644 (file)
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 
-# Copyright (c) 2023 Cisco and/or its affiliates.
+# Copyright (c) 2024 Cisco and/or its affiliates.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at:
@@ -29,8 +29,8 @@ from pyspark.sql.functions import col, lit, regexp_replace
 from pyspark.sql.types import StructType
 
 
-S3_LOGS_BUCKET="fdio-logs-s3-cloudfront-index"
-S3_DOCS_BUCKET="fdio-docs-s3-cloudfront-index"
+S3_LOGS_BUCKET=environ.get("S3_LOGS_BUCKET", "fdio-logs-s3-cloudfront-index")
+S3_DOCS_BUCKET=environ.get("S3_DOCS_BUCKET", "fdio-docs-s3-cloudfront-index")
 PATH=f"s3://{S3_LOGS_BUCKET}/vex-yul-rot-jenkins-1/csit-*-perf-*"
 SUFFIX="info.json.gz"
 IGNORE_SUFFIX=[
@@ -152,6 +152,16 @@ out_sdf = out_sdf \
     .withColumn("day", lit(datetime.now().day)) \
     .repartition(1)
 
+try:
+    boto3_session = session.Session(
+        aws_access_key_id=environ["OUT_AWS_ACCESS_KEY_ID"],
+        aws_secret_access_key=environ["OUT_AWS_SECRET_ACCESS_KEY"],
+        region_name=environ["OUT_AWS_DEFAULT_REGION"]
+    )
+except KeyError:
+    boto3_session = session.Session()
+)
+
 try:
     wr.s3.to_parquet(
         df=out_sdf.toPandas(),
@@ -161,11 +171,7 @@ try:
         compression="snappy",
         use_threads=True,
         mode="overwrite_partitions",
-        boto3_session=session.Session(
-            aws_access_key_id=environ["OUT_AWS_ACCESS_KEY_ID"],
-            aws_secret_access_key=environ["OUT_AWS_SECRET_ACCESS_KEY"],
-            region_name=environ["OUT_AWS_DEFAULT_REGION"]
-        )
+        boto3_session=boto3_session
     )
 except EmptyDataFrame:
     pass
index 40da521..e6faf5b 100644 (file)
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 
-# Copyright (c) 2023 Cisco and/or its affiliates.
+# Copyright (c) 2024 Cisco and/or its affiliates.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at:
@@ -29,8 +29,8 @@ from pyspark.sql.functions import col, lit, regexp_replace
 from pyspark.sql.types import StructType
 
 
-S3_LOGS_BUCKET="fdio-logs-s3-cloudfront-index"
-S3_DOCS_BUCKET="fdio-docs-s3-cloudfront-index"
+S3_LOGS_BUCKET=environ.get("S3_LOGS_BUCKET", "fdio-logs-s3-cloudfront-index")
+S3_DOCS_BUCKET=environ.get("S3_DOCS_BUCKET", "fdio-docs-s3-cloudfront-index")
 PATH=f"s3://{S3_LOGS_BUCKET}/vex-yul-rot-jenkins-1/csit-*-perf-*"
 SUFFIX="info.json.gz"
 IGNORE_SUFFIX=[
@@ -144,7 +144,6 @@ paths = wr.s3.list_objects(
 filtered_paths = [path for path in paths if "daily" in path or "weekly" in path]
 
 out_sdf = process_json_to_dataframe("soak", filtered_paths)
-out_sdf.show(truncate=False)
 out_sdf.printSchema()
 out_sdf = out_sdf \
     .withColumn("year", lit(datetime.now().year)) \
@@ -152,6 +151,16 @@ out_sdf = out_sdf \
     .withColumn("day", lit(datetime.now().day)) \
     .repartition(1)
 
+try:
+    boto3_session = session.Session(
+        aws_access_key_id=environ["OUT_AWS_ACCESS_KEY_ID"],
+        aws_secret_access_key=environ["OUT_AWS_SECRET_ACCESS_KEY"],
+        region_name=environ["OUT_AWS_DEFAULT_REGION"]
+    )
+except KeyError:
+    boto3_session = session.Session()
+)
+
 try:
     wr.s3.to_parquet(
         df=out_sdf.toPandas(),
@@ -161,11 +170,7 @@ try:
         compression="snappy",
         use_threads=True,
         mode="overwrite_partitions",
-        boto3_session=session.Session(
-            aws_access_key_id=environ["OUT_AWS_ACCESS_KEY_ID"],
-            aws_secret_access_key=environ["OUT_AWS_SECRET_ACCESS_KEY"],
-            region_name=environ["OUT_AWS_DEFAULT_REGION"]
-        )
+        boto3_session=boto3_session
     )
 except EmptyDataFrame:
     pass
diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-device-rls2402.hcl.tftpl b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-device-rls2402.hcl.tftpl
deleted file mode 100644 (file)
index cc0b1df..0000000
+++ /dev/null
@@ -1,55 +0,0 @@
-job "${job_name}" {
-  datacenters = "${datacenters}"
-  type        = "${type}"
-  periodic {
-    cron             = "${cron}"
-    prohibit_overlap = "${prohibit_overlap}"
-    time_zone        = "${time_zone}"
-  }
-  group "${job_name}" {
-    restart {
-      mode = "fail"
-    }
-    constraint {
-      attribute = "$${attr.cpu.arch}"
-      operator  = "!="
-      value     = "arm64"
-    }
-    constraint {
-      attribute = "$${node.class}"
-      value     = "builder"
-    }
-    task "${job_name}" {
-      artifact {
-        source      = "git::https://github.com/FDio/csit"
-        destination = "local/csit"
-      }
-      driver = "docker"
-      config {
-        image   = "${image}"
-        command = "gluesparksubmit"
-        args = [
-          "--driver-memory", "20g",
-          "--executor-memory", "20g",
-          "--executor-cores", "2",
-          "--master", "local[2]",
-          "coverage_device_rls2402.py"
-        ]
-        work_dir = "/local/csit/csit.infra.etl"
-      }
-      env {
-        AWS_ACCESS_KEY_ID         = "${aws_access_key_id}"
-        AWS_SECRET_ACCESS_KEY     = "${aws_secret_access_key}"
-        AWS_DEFAULT_REGION        = "${aws_default_region}"
-        OUT_AWS_ACCESS_KEY_ID     = "${out_aws_access_key_id}"
-        OUT_AWS_SECRET_ACCESS_KEY = "${out_aws_secret_access_key}"
-        OUT_AWS_DEFAULT_REGION    = "${out_aws_default_region}"
-        ${ envs }
-      }
-      resources {
-        cpu    = ${cpu}
-        memory = ${memory}
-      }
-    }
-  }
-}
\ No newline at end of file
@@ -33,7 +33,7 @@ job "${job_name}" {
           "--executor-memory", "20g",
           "--executor-cores", "2",
           "--master", "local[2]",
-          "iterative_ndrpdr_rls2402.py"
+          "coverage_device_rls2406.py"
         ]
         work_dir = "/local/csit/csit.infra.etl"
       }
diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-hoststack-rls2402.hcl.tftpl b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-hoststack-rls2402.hcl.tftpl
deleted file mode 100644 (file)
index 95d7a4c..0000000
+++ /dev/null
@@ -1,55 +0,0 @@
-job "${job_name}" {
-  datacenters = "${datacenters}"
-  type        = "${type}"
-  periodic {
-    cron             = "${cron}"
-    prohibit_overlap = "${prohibit_overlap}"
-    time_zone        = "${time_zone}"
-  }
-  group "${job_name}" {
-    restart {
-      mode = "fail"
-    }
-    constraint {
-      attribute = "$${attr.cpu.arch}"
-      operator  = "!="
-      value     = "arm64"
-    }
-    constraint {
-      attribute = "$${node.class}"
-      value     = "builder"
-    }
-    task "${job_name}" {
-      artifact {
-        source      = "git::https://github.com/FDio/csit"
-        destination = "local/csit"
-      }
-      driver = "docker"
-      config {
-        image   = "${image}"
-        command = "gluesparksubmit"
-        args = [
-          "--driver-memory", "20g",
-          "--executor-memory", "20g",
-          "--executor-cores", "2",
-          "--master", "local[2]",
-          "coverage_hoststack_rls2402.py"
-        ]
-        work_dir = "/local/csit/csit.infra.etl"
-      }
-      env {
-        AWS_ACCESS_KEY_ID         = "${aws_access_key_id}"
-        AWS_SECRET_ACCESS_KEY     = "${aws_secret_access_key}"
-        AWS_DEFAULT_REGION        = "${aws_default_region}"
-        OUT_AWS_ACCESS_KEY_ID     = "${out_aws_access_key_id}"
-        OUT_AWS_SECRET_ACCESS_KEY = "${out_aws_secret_access_key}"
-        OUT_AWS_DEFAULT_REGION    = "${out_aws_default_region}"
-        ${ envs }
-      }
-      resources {
-        cpu    = ${cpu}
-        memory = ${memory}
-      }
-    }
-  }
-}
\ No newline at end of file
diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-hoststack-rls2406.hcl.tftpl b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-hoststack-rls2406.hcl.tftpl
new file mode 100644 (file)
index 0000000..babde4f
--- /dev/null
@@ -0,0 +1,55 @@
+job "${job_name}" {
+  datacenters = "${datacenters}"
+  type        = "${type}"
+  periodic {
+    cron             = "${cron}"
+    prohibit_overlap = "${prohibit_overlap}"
+    time_zone        = "${time_zone}"
+  }
+  group "${job_name}" {
+    restart {
+      mode = "fail"
+    }
+    constraint {
+      attribute = "$${attr.cpu.arch}"
+      operator  = "!="
+      value     = "arm64"
+    }
+    constraint {
+      attribute = "$${node.class}"
+      value     = "builder"
+    }
+    task "${job_name}" {
+      artifact {
+        source      = "git::https://github.com/FDio/csit"
+        destination = "local/csit"
+      }
+      driver = "docker"
+      config {
+        image   = "${image}"
+        command = "gluesparksubmit"
+        args = [
+          "--driver-memory", "20g",
+          "--executor-memory", "20g",
+          "--executor-cores", "2",
+          "--master", "local[2]",
+          "coverage_hoststack_rls2406.py"
+        ]
+        work_dir = "/local/csit/csit.infra.etl"
+      }
+      env {
+        AWS_ACCESS_KEY_ID         = "${aws_access_key_id}"
+        AWS_SECRET_ACCESS_KEY     = "${aws_secret_access_key}"
+        AWS_DEFAULT_REGION        = "${aws_default_region}"
+        OUT_AWS_ACCESS_KEY_ID     = "${out_aws_access_key_id}"
+        OUT_AWS_SECRET_ACCESS_KEY = "${out_aws_secret_access_key}"
+        OUT_AWS_DEFAULT_REGION    = "${out_aws_default_region}"
+        ${ envs }
+      }
+      resources {
+        cpu    = ${cpu}
+        memory = ${memory}
+      }
+    }
+  }
+}
diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-mrr-rls2402.hcl.tftpl b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-mrr-rls2402.hcl.tftpl
deleted file mode 100644 (file)
index 3bab926..0000000
+++ /dev/null
@@ -1,55 +0,0 @@
-job "${job_name}" {
-  datacenters = "${datacenters}"
-  type        = "${type}"
-  periodic {
-    cron             = "${cron}"
-    prohibit_overlap = "${prohibit_overlap}"
-    time_zone        = "${time_zone}"
-  }
-  group "${job_name}" {
-    restart {
-      mode = "fail"
-    }
-    constraint {
-      attribute = "$${attr.cpu.arch}"
-      operator  = "!="
-      value     = "arm64"
-    }
-    constraint {
-      attribute = "$${node.class}"
-      value     = "builder"
-    }
-    task "${job_name}" {
-      artifact {
-        source      = "git::https://github.com/FDio/csit"
-        destination = "local/csit"
-      }
-      driver = "docker"
-      config {
-        image   = "${image}"
-        command = "gluesparksubmit"
-        args = [
-          "--driver-memory", "20g",
-          "--executor-memory", "20g",
-          "--executor-cores", "2",
-          "--master", "local[2]",
-          "coverage_mrr_rls2402.py"
-        ]
-        work_dir = "/local/csit/csit.infra.etl"
-      }
-      env {
-        AWS_ACCESS_KEY_ID         = "${aws_access_key_id}"
-        AWS_SECRET_ACCESS_KEY     = "${aws_secret_access_key}"
-        AWS_DEFAULT_REGION        = "${aws_default_region}"
-        OUT_AWS_ACCESS_KEY_ID     = "${out_aws_access_key_id}"
-        OUT_AWS_SECRET_ACCESS_KEY = "${out_aws_secret_access_key}"
-        OUT_AWS_DEFAULT_REGION    = "${out_aws_default_region}"
-        ${ envs }
-      }
-      resources {
-        cpu    = ${cpu}
-        memory = ${memory}
-      }
-    }
-  }
-}
\ No newline at end of file
@@ -33,7 +33,7 @@ job "${job_name}" {
           "--executor-memory", "20g",
           "--executor-cores", "2",
           "--master", "local[2]",
-          "iterative_mrr_rls2402.py"
+          "coverage_mrr_rls2406.py"
         ]
         work_dir = "/local/csit/csit.infra.etl"
       }
diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-ndrpdr-rls2402.hcl.tftpl b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-ndrpdr-rls2402.hcl.tftpl
deleted file mode 100644 (file)
index 6142219..0000000
+++ /dev/null
@@ -1,55 +0,0 @@
-job "${job_name}" {
-  datacenters = "${datacenters}"
-  type        = "${type}"
-  periodic {
-    cron             = "${cron}"
-    prohibit_overlap = "${prohibit_overlap}"
-    time_zone        = "${time_zone}"
-  }
-  group "${job_name}" {
-    restart {
-      mode = "fail"
-    }
-    constraint {
-      attribute = "$${attr.cpu.arch}"
-      operator  = "!="
-      value     = "arm64"
-    }
-    constraint {
-      attribute = "$${node.class}"
-      value     = "builder"
-    }
-    task "${job_name}" {
-      artifact {
-        source      = "git::https://github.com/FDio/csit"
-        destination = "local/csit"
-      }
-      driver = "docker"
-      config {
-        image   = "${image}"
-        command = "gluesparksubmit"
-        args = [
-          "--driver-memory", "20g",
-          "--executor-memory", "20g",
-          "--executor-cores", "2",
-          "--master", "local[2]",
-          "coverage_ndrpdr_rls2402.py"
-        ]
-        work_dir = "/local/csit/csit.infra.etl"
-      }
-      env {
-        AWS_ACCESS_KEY_ID         = "${aws_access_key_id}"
-        AWS_SECRET_ACCESS_KEY     = "${aws_secret_access_key}"
-        AWS_DEFAULT_REGION        = "${aws_default_region}"
-        OUT_AWS_ACCESS_KEY_ID     = "${out_aws_access_key_id}"
-        OUT_AWS_SECRET_ACCESS_KEY = "${out_aws_secret_access_key}"
-        OUT_AWS_DEFAULT_REGION    = "${out_aws_default_region}"
-        ${ envs }
-      }
-      resources {
-        cpu    = ${cpu}
-        memory = ${memory}
-      }
-    }
-  }
-}
\ No newline at end of file
@@ -33,7 +33,7 @@ job "${job_name}" {
           "--executor-memory", "20g",
           "--executor-cores", "2",
           "--master", "local[2]",
-          "iterative_reconf_rls2402.py"
+          "coverage_ndrpdr_rls2406.py"
         ]
         work_dir = "/local/csit/csit.infra.etl"
       }
diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-reconf-rls2402.hcl.tftpl b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-reconf-rls2402.hcl.tftpl
deleted file mode 100644 (file)
index b474e75..0000000
+++ /dev/null
@@ -1,55 +0,0 @@
-job "${job_name}" {
-  datacenters = "${datacenters}"
-  type        = "${type}"
-  periodic {
-    cron             = "${cron}"
-    prohibit_overlap = "${prohibit_overlap}"
-    time_zone        = "${time_zone}"
-  }
-  group "${job_name}" {
-    restart {
-      mode = "fail"
-    }
-    constraint {
-      attribute = "$${attr.cpu.arch}"
-      operator  = "!="
-      value     = "arm64"
-    }
-    constraint {
-      attribute = "$${node.class}"
-      value     = "builder"
-    }
-    task "${job_name}" {
-      artifact {
-        source      = "git::https://github.com/FDio/csit"
-        destination = "local/csit"
-      }
-      driver = "docker"
-      config {
-        image   = "${image}"
-        command = "gluesparksubmit"
-        args = [
-          "--driver-memory", "20g",
-          "--executor-memory", "20g",
-          "--executor-cores", "2",
-          "--master", "local[2]",
-          "coverage_reconf_rls2402.py"
-        ]
-        work_dir = "/local/csit/csit.infra.etl"
-      }
-      env {
-        AWS_ACCESS_KEY_ID         = "${aws_access_key_id}"
-        AWS_SECRET_ACCESS_KEY     = "${aws_secret_access_key}"
-        AWS_DEFAULT_REGION        = "${aws_default_region}"
-        OUT_AWS_ACCESS_KEY_ID     = "${out_aws_access_key_id}"
-        OUT_AWS_SECRET_ACCESS_KEY = "${out_aws_secret_access_key}"
-        OUT_AWS_DEFAULT_REGION    = "${out_aws_default_region}"
-        ${ envs }
-      }
-      resources {
-        cpu    = ${cpu}
-        memory = ${memory}
-      }
-    }
-  }
-}
\ No newline at end of file
diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-reconf-rls2406.hcl.tftpl b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-reconf-rls2406.hcl.tftpl
new file mode 100644 (file)
index 0000000..f03beb4
--- /dev/null
@@ -0,0 +1,55 @@
+job "${job_name}" {
+  datacenters = "${datacenters}"
+  type        = "${type}"
+  periodic {
+    cron             = "${cron}"
+    prohibit_overlap = "${prohibit_overlap}"
+    time_zone        = "${time_zone}"
+  }
+  group "${job_name}" {
+    restart {
+      mode = "fail"
+    }
+    constraint {
+      attribute = "$${attr.cpu.arch}"
+      operator  = "!="
+      value     = "arm64"
+    }
+    constraint {
+      attribute = "$${node.class}"
+      value     = "builder"
+    }
+    task "${job_name}" {
+      artifact {
+        source      = "git::https://github.com/FDio/csit"
+        destination = "local/csit"
+      }
+      driver = "docker"
+      config {
+        image   = "${image}"
+        command = "gluesparksubmit"
+        args = [
+          "--driver-memory", "20g",
+          "--executor-memory", "20g",
+          "--executor-cores", "2",
+          "--master", "local[2]",
+          "coverage_reconf_rls2406.py"
+        ]
+        work_dir = "/local/csit/csit.infra.etl"
+      }
+      env {
+        AWS_ACCESS_KEY_ID         = "${aws_access_key_id}"
+        AWS_SECRET_ACCESS_KEY     = "${aws_secret_access_key}"
+        AWS_DEFAULT_REGION        = "${aws_default_region}"
+        OUT_AWS_ACCESS_KEY_ID     = "${out_aws_access_key_id}"
+        OUT_AWS_SECRET_ACCESS_KEY = "${out_aws_secret_access_key}"
+        OUT_AWS_DEFAULT_REGION    = "${out_aws_default_region}"
+        ${ envs }
+      }
+      resources {
+        cpu    = ${cpu}
+        memory = ${memory}
+      }
+    }
+  }
+}
@@ -33,7 +33,7 @@ job "${job_name}" {
           "--executor-memory", "20g",
           "--executor-cores", "2",
           "--master", "local[2]",
-          "iterative_soak_rls2402.py"
+          "coverage_soak_rls2406.py"
         ]
         work_dir = "/local/csit/csit.infra.etl"
       }
@@ -33,7 +33,7 @@ job "${job_name}" {
           "--executor-memory", "20g",
           "--executor-cores", "2",
           "--master", "local[2]",
-          "iterative_hoststack_rls2402.py"
+          "iterative_hoststack_rls2406.py"
         ]
         work_dir = "/local/csit/csit.infra.etl"
       }
@@ -33,7 +33,7 @@ job "${job_name}" {
           "--executor-memory", "20g",
           "--executor-cores", "2",
           "--master", "local[2]",
-          "coverage_soak_rls2402.py"
+          "iterative_mrr_rls2406.py"
         ]
         work_dir = "/local/csit/csit.infra.etl"
       }
@@ -52,4 +52,4 @@ job "${job_name}" {
       }
     }
   }
-}
\ No newline at end of file
+}
diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-ndrpdr-rls2406.hcl.tftpl b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-ndrpdr-rls2406.hcl.tftpl
new file mode 100644 (file)
index 0000000..461dad8
--- /dev/null
@@ -0,0 +1,55 @@
+job "${job_name}" {
+  datacenters = "${datacenters}"
+  type        = "${type}"
+  periodic {
+    cron             = "${cron}"
+    prohibit_overlap = "${prohibit_overlap}"
+    time_zone        = "${time_zone}"
+  }
+  group "${job_name}" {
+    restart {
+      mode = "fail"
+    }
+    constraint {
+      attribute = "$${attr.cpu.arch}"
+      operator  = "!="
+      value     = "arm64"
+    }
+    constraint {
+      attribute = "$${node.class}"
+      value     = "builder"
+    }
+    task "${job_name}" {
+      artifact {
+        source      = "git::https://github.com/FDio/csit"
+        destination = "local/csit"
+      }
+      driver = "docker"
+      config {
+        image   = "${image}"
+        command = "gluesparksubmit"
+        args = [
+          "--driver-memory", "20g",
+          "--executor-memory", "20g",
+          "--executor-cores", "2",
+          "--master", "local[2]",
+          "iterative_ndrpdr_rls2406.py"
+        ]
+        work_dir = "/local/csit/csit.infra.etl"
+      }
+      env {
+        AWS_ACCESS_KEY_ID         = "${aws_access_key_id}"
+        AWS_SECRET_ACCESS_KEY     = "${aws_secret_access_key}"
+        AWS_DEFAULT_REGION        = "${aws_default_region}"
+        OUT_AWS_ACCESS_KEY_ID     = "${out_aws_access_key_id}"
+        OUT_AWS_SECRET_ACCESS_KEY = "${out_aws_secret_access_key}"
+        OUT_AWS_DEFAULT_REGION    = "${out_aws_default_region}"
+        ${ envs }
+      }
+      resources {
+        cpu    = ${cpu}
+        memory = ${memory}
+      }
+    }
+  }
+}
diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-reconf-rls2406.hcl.tftpl b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-reconf-rls2406.hcl.tftpl
new file mode 100644 (file)
index 0000000..d7f5daa
--- /dev/null
@@ -0,0 +1,55 @@
+job "${job_name}" {
+  datacenters = "${datacenters}"
+  type        = "${type}"
+  periodic {
+    cron             = "${cron}"
+    prohibit_overlap = "${prohibit_overlap}"
+    time_zone        = "${time_zone}"
+  }
+  group "${job_name}" {
+    restart {
+      mode = "fail"
+    }
+    constraint {
+      attribute = "$${attr.cpu.arch}"
+      operator  = "!="
+      value     = "arm64"
+    }
+    constraint {
+      attribute = "$${node.class}"
+      value     = "builder"
+    }
+    task "${job_name}" {
+      artifact {
+        source      = "git::https://github.com/FDio/csit"
+        destination = "local/csit"
+      }
+      driver = "docker"
+      config {
+        image   = "${image}"
+        command = "gluesparksubmit"
+        args = [
+          "--driver-memory", "20g",
+          "--executor-memory", "20g",
+          "--executor-cores", "2",
+          "--master", "local[2]",
+          "iterative_reconf_rls2406.py"
+        ]
+        work_dir = "/local/csit/csit.infra.etl"
+      }
+      env {
+        AWS_ACCESS_KEY_ID         = "${aws_access_key_id}"
+        AWS_SECRET_ACCESS_KEY     = "${aws_secret_access_key}"
+        AWS_DEFAULT_REGION        = "${aws_default_region}"
+        OUT_AWS_ACCESS_KEY_ID     = "${out_aws_access_key_id}"
+        OUT_AWS_SECRET_ACCESS_KEY = "${out_aws_secret_access_key}"
+        OUT_AWS_DEFAULT_REGION    = "${out_aws_default_region}"
+        ${ envs }
+      }
+      resources {
+        cpu    = ${cpu}
+        memory = ${memory}
+      }
+    }
+  }
+}
diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-soak-rls2406.hcl.tftpl b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-soak-rls2406.hcl.tftpl
new file mode 100644 (file)
index 0000000..3bf8505
--- /dev/null
@@ -0,0 +1,55 @@
+job "${job_name}" {
+  datacenters = "${datacenters}"
+  type        = "${type}"
+  periodic {
+    cron             = "${cron}"
+    prohibit_overlap = "${prohibit_overlap}"
+    time_zone        = "${time_zone}"
+  }
+  group "${job_name}" {
+    restart {
+      mode = "fail"
+    }
+    constraint {
+      attribute = "$${attr.cpu.arch}"
+      operator  = "!="
+      value     = "arm64"
+    }
+    constraint {
+      attribute = "$${node.class}"
+      value     = "builder"
+    }
+    task "${job_name}" {
+      artifact {
+        source      = "git::https://github.com/FDio/csit"
+        destination = "local/csit"
+      }
+      driver = "docker"
+      config {
+        image   = "${image}"
+        command = "gluesparksubmit"
+        args = [
+          "--driver-memory", "20g",
+          "--executor-memory", "20g",
+          "--executor-cores", "2",
+          "--master", "local[2]",
+          "iterative_soak_rls2406.py"
+        ]
+        work_dir = "/local/csit/csit.infra.etl"
+      }
+      env {
+        AWS_ACCESS_KEY_ID         = "${aws_access_key_id}"
+        AWS_SECRET_ACCESS_KEY     = "${aws_secret_access_key}"
+        AWS_DEFAULT_REGION        = "${aws_default_region}"
+        OUT_AWS_ACCESS_KEY_ID     = "${out_aws_access_key_id}"
+        OUT_AWS_SECRET_ACCESS_KEY = "${out_aws_secret_access_key}"
+        OUT_AWS_DEFAULT_REGION    = "${out_aws_default_region}"
+        ${ envs }
+      }
+      resources {
+        cpu    = ${cpu}
+        memory = ${memory}
+      }
+    }
+  }
+}
index aac81d9..28234bf 100644 (file)
@@ -94,190 +94,190 @@ module "etl-trending-soak" {
   memory                    = 60000
 }
 
-#module "etl-iterative-hoststack-rls2402" {
-#  providers = {
-#    nomad = nomad.yul1
-#  }
-#  source = "../"
-#
-#  aws_access_key_id         = data.vault_generic_secret.fdio_logs.data["access_key"]
-#  aws_secret_access_key     = data.vault_generic_secret.fdio_logs.data["secret_key"]
-#  aws_default_region        = data.vault_generic_secret.fdio_logs.data["region"]
-#  out_aws_access_key_id     = data.vault_generic_secret.fdio_docs.data["access_key"]
-#  out_aws_secret_access_key = data.vault_generic_secret.fdio_docs.data["secret_key"]
-#  out_aws_default_region    = data.vault_generic_secret.fdio_docs.data["region"]
-#  cron                      = "0 30 0 * * * *"
-#  datacenters               = ["yul1"]
-#  job_name                  = "etl-iterative-hoststack-rls2402"
-#}
-#
-#module "etl-iterative-mrr-rls2402" {
-#  providers = {
-#    nomad = nomad.yul1
-#  }
-#  source = "../"
-#
-#  aws_access_key_id         = data.vault_generic_secret.fdio_logs.data["access_key"]
-#  aws_secret_access_key     = data.vault_generic_secret.fdio_logs.data["secret_key"]
-#  aws_default_region        = data.vault_generic_secret.fdio_logs.data["region"]
-#  out_aws_access_key_id     = data.vault_generic_secret.fdio_docs.data["access_key"]
-#  out_aws_secret_access_key = data.vault_generic_secret.fdio_docs.data["secret_key"]
-#  out_aws_default_region    = data.vault_generic_secret.fdio_docs.data["region"]
-#  cron                      = "0 30 0 * * * *"
-#  datacenters               = ["yul1"]
-#  job_name                  = "etl-iterative-mrr-rls2402"
-#}
-#
-#module "etl-iterative-ndrpdr-rls2402" {
-#  providers = {
-#    nomad = nomad.yul1
-#  }
-#  source = "../"
-#
-#  aws_access_key_id         = data.vault_generic_secret.fdio_logs.data["access_key"]
-#  aws_secret_access_key     = data.vault_generic_secret.fdio_logs.data["secret_key"]
-#  aws_default_region        = data.vault_generic_secret.fdio_logs.data["region"]
-#  out_aws_access_key_id     = data.vault_generic_secret.fdio_docs.data["access_key"]
-#  out_aws_secret_access_key = data.vault_generic_secret.fdio_docs.data["secret_key"]
-#  out_aws_default_region    = data.vault_generic_secret.fdio_docs.data["region"]
-#  cron                      = "0 30 0 * * * *"
-#  datacenters               = ["yul1"]
-#  job_name                  = "etl-iterative-ndrpdr-rls2402"
-#}
-#
-#module "etl-iterative-reconf-rls2402" {
-#  providers = {
-#    nomad = nomad.yul1
-#  }
-#  source = "../"
-#
-#  aws_access_key_id         = data.vault_generic_secret.fdio_logs.data["access_key"]
-#  aws_secret_access_key     = data.vault_generic_secret.fdio_logs.data["secret_key"]
-#  aws_default_region        = data.vault_generic_secret.fdio_logs.data["region"]
-#  out_aws_access_key_id     = data.vault_generic_secret.fdio_docs.data["access_key"]
-#  out_aws_secret_access_key = data.vault_generic_secret.fdio_docs.data["secret_key"]
-#  out_aws_default_region    = data.vault_generic_secret.fdio_docs.data["region"]
-#  cron                      = "0 30 0 * * * *"
-#  datacenters               = ["yul1"]
-#  job_name                  = "etl-iterative-reconf-rls2402"
-#}
-#
-#module "etl-iterative-soak-rls2402" {
-#  providers = {
-#    nomad = nomad.yul1
-#  }
-#  source = "../"
-#
-#  aws_access_key_id         = data.vault_generic_secret.fdio_logs.data["access_key"]
-#  aws_secret_access_key     = data.vault_generic_secret.fdio_logs.data["secret_key"]
-#  aws_default_region        = data.vault_generic_secret.fdio_logs.data["region"]
-#  out_aws_access_key_id     = data.vault_generic_secret.fdio_docs.data["access_key"]
-#  out_aws_secret_access_key = data.vault_generic_secret.fdio_docs.data["secret_key"]
-#  out_aws_default_region    = data.vault_generic_secret.fdio_docs.data["region"]
-#  cron                      = "0 30 0 * * * *"
-#  datacenters               = ["yul1"]
-#  job_name                  = "etl-iterative-soak-rls2402"
-#}
-#
-#module "etl-coverage-device-rls2402" {
-#  providers = {
-#    nomad = nomad.yul1
-#  }
-#  source = "../"
-#
-#  aws_access_key_id         = data.vault_generic_secret.fdio_logs.data["access_key"]
-#  aws_secret_access_key     = data.vault_generic_secret.fdio_logs.data["secret_key"]
-#  aws_default_region        = data.vault_generic_secret.fdio_logs.data["region"]
-#  out_aws_access_key_id     = data.vault_generic_secret.fdio_docs.data["access_key"]
-#  out_aws_secret_access_key = data.vault_generic_secret.fdio_docs.data["secret_key"]
-#  out_aws_default_region    = data.vault_generic_secret.fdio_docs.data["region"]
-#  cron                      = "0 30 0 * * * *"
-#  datacenters               = ["yul1"]
-#  job_name                  = "etl-coverage-device-rls2402"
-#}
-#
-#module "etl-coverage-hoststack-rls2402" {
-#  providers = {
-#    nomad = nomad.yul1
-#  }
-#  source = "../"
-#
-#  aws_access_key_id         = data.vault_generic_secret.fdio_logs.data["access_key"]
-#  aws_secret_access_key     = data.vault_generic_secret.fdio_logs.data["secret_key"]
-#  aws_default_region        = data.vault_generic_secret.fdio_logs.data["region"]
-#  out_aws_access_key_id     = data.vault_generic_secret.fdio_docs.data["access_key"]
-#  out_aws_secret_access_key = data.vault_generic_secret.fdio_docs.data["secret_key"]
-#  out_aws_default_region    = data.vault_generic_secret.fdio_docs.data["region"]
-#  cron                      = "0 30 0 * * * *"
-#  datacenters               = ["yul1"]
-#  job_name                  = "etl-coverage-hoststack-rls2402"
-#}
-#
-#module "etl-coverage-mrr-rls2402" {
-#  providers = {
-#    nomad = nomad.yul1
-#  }
-#  source = "../"
-#
-#  aws_access_key_id         = data.vault_generic_secret.fdio_logs.data["access_key"]
-#  aws_secret_access_key     = data.vault_generic_secret.fdio_logs.data["secret_key"]
-#  aws_default_region        = data.vault_generic_secret.fdio_logs.data["region"]
-#  out_aws_access_key_id     = data.vault_generic_secret.fdio_docs.data["access_key"]
-#  out_aws_secret_access_key = data.vault_generic_secret.fdio_docs.data["secret_key"]
-#  out_aws_default_region    = data.vault_generic_secret.fdio_docs.data["region"]
-#  cron                      = "0 30 0 * * * *"
-#  datacenters               = ["yul1"]
-#  job_name                  = "etl-coverage-mrr-rls2402"
-#}
-#
-#module "etl-coverage-ndrpdr-rls2402" {
-#  providers = {
-#    nomad = nomad.yul1
-#  }
-#  source = "../"
-#
-#  aws_access_key_id         = data.vault_generic_secret.fdio_logs.data["access_key"]
-#  aws_secret_access_key     = data.vault_generic_secret.fdio_logs.data["secret_key"]
-#  aws_default_region        = data.vault_generic_secret.fdio_logs.data["region"]
-#  out_aws_access_key_id     = data.vault_generic_secret.fdio_docs.data["access_key"]
-#  out_aws_secret_access_key = data.vault_generic_secret.fdio_docs.data["secret_key"]
-#  out_aws_default_region    = data.vault_generic_secret.fdio_docs.data["region"]
-#  cron                      = "0 30 0 * * * *"
-#  datacenters               = ["yul1"]
-#  job_name                  = "etl-coverage-ndrpdr-rls2402"
-#}
-#
-#module "etl-coverage-reconf-rls2402" {
-#  providers = {
-#    nomad = nomad.yul1
-#  }
-#  source = "../"
-#
-#  aws_access_key_id         = data.vault_generic_secret.fdio_logs.data["access_key"]
-#  aws_secret_access_key     = data.vault_generic_secret.fdio_logs.data["secret_key"]
-#  aws_default_region        = data.vault_generic_secret.fdio_logs.data["region"]
-#  out_aws_access_key_id     = data.vault_generic_secret.fdio_docs.data["access_key"]
-#  out_aws_secret_access_key = data.vault_generic_secret.fdio_docs.data["secret_key"]
-#  out_aws_default_region    = data.vault_generic_secret.fdio_docs.data["region"]
-#  cron                      = "0 30 0 * * * *"
-#  datacenters               = ["yul1"]
-#  job_name                  = "etl-coverage-reconf-rls2402"
-#}
-#
-#module "etl-coverage-soak-rls2402" {
-#  providers = {
-#    nomad = nomad.yul1
-#  }
-#  source = "../"
-#
-#  aws_access_key_id         = data.vault_generic_secret.fdio_logs.data["access_key"]
-#  aws_secret_access_key     = data.vault_generic_secret.fdio_logs.data["secret_key"]
-#  aws_default_region        = data.vault_generic_secret.fdio_logs.data["region"]
-#  out_aws_access_key_id     = data.vault_generic_secret.fdio_docs.data["access_key"]
-#  out_aws_secret_access_key = data.vault_generic_secret.fdio_docs.data["secret_key"]
-#  out_aws_default_region    = data.vault_generic_secret.fdio_docs.data["region"]
-#  cron                      = "0 30 0 * * * *"
-#  datacenters               = ["yul1"]
-#  job_name                  = "etl-coverage-soak-rls2402"
-#}
-#
\ No newline at end of file
+module "etl-iterative-hoststack-rls2406" {
+  providers = {
+    nomad = nomad.yul1
+  }
+  source = "../"
+
+  aws_access_key_id         = data.vault_generic_secret.fdio_logs.data["access_key"]
+  aws_secret_access_key     = data.vault_generic_secret.fdio_logs.data["secret_key"]
+  aws_default_region        = data.vault_generic_secret.fdio_logs.data["region"]
+  out_aws_access_key_id     = data.vault_generic_secret.fdio_docs.data["access_key"]
+  out_aws_secret_access_key = data.vault_generic_secret.fdio_docs.data["secret_key"]
+  out_aws_default_region    = data.vault_generic_secret.fdio_docs.data["region"]
+  cron                      = "0 30 0 * * * *"
+  datacenters               = ["yul1"]
+  job_name                  = "etl-iterative-hoststack-rls2406"
+}
+
+module "etl-iterative-mrr-rls2406" {
+  providers = {
+    nomad = nomad.yul1
+  }
+  source = "../"
+
+  aws_access_key_id         = data.vault_generic_secret.fdio_logs.data["access_key"]
+  aws_secret_access_key     = data.vault_generic_secret.fdio_logs.data["secret_key"]
+  aws_default_region        = data.vault_generic_secret.fdio_logs.data["region"]
+  out_aws_access_key_id     = data.vault_generic_secret.fdio_docs.data["access_key"]
+  out_aws_secret_access_key = data.vault_generic_secret.fdio_docs.data["secret_key"]
+  out_aws_default_region    = data.vault_generic_secret.fdio_docs.data["region"]
+  cron                      = "0 30 0 * * * *"
+  datacenters               = ["yul1"]
+  job_name                  = "etl-iterative-mrr-rls2406"
+}
+
+module "etl-iterative-ndrpdr-rls2406" {
+  providers = {
+    nomad = nomad.yul1
+  }
+  source = "../"
+
+  aws_access_key_id         = data.vault_generic_secret.fdio_logs.data["access_key"]
+  aws_secret_access_key     = data.vault_generic_secret.fdio_logs.data["secret_key"]
+  aws_default_region        = data.vault_generic_secret.fdio_logs.data["region"]
+  out_aws_access_key_id     = data.vault_generic_secret.fdio_docs.data["access_key"]
+  out_aws_secret_access_key = data.vault_generic_secret.fdio_docs.data["secret_key"]
+  out_aws_default_region    = data.vault_generic_secret.fdio_docs.data["region"]
+  cron                      = "0 30 0 * * * *"
+  datacenters               = ["yul1"]
+  job_name                  = "etl-iterative-ndrpdr-rls2406"
+}
+
+module "etl-iterative-reconf-rls2406" {
+  providers = {
+    nomad = nomad.yul1
+  }
+  source = "../"
+
+  aws_access_key_id         = data.vault_generic_secret.fdio_logs.data["access_key"]
+  aws_secret_access_key     = data.vault_generic_secret.fdio_logs.data["secret_key"]
+  aws_default_region        = data.vault_generic_secret.fdio_logs.data["region"]
+  out_aws_access_key_id     = data.vault_generic_secret.fdio_docs.data["access_key"]
+  out_aws_secret_access_key = data.vault_generic_secret.fdio_docs.data["secret_key"]
+  out_aws_default_region    = data.vault_generic_secret.fdio_docs.data["region"]
+  cron                      = "0 30 0 * * * *"
+  datacenters               = ["yul1"]
+  job_name                  = "etl-iterative-reconf-rls2406"
+}
+
+module "etl-iterative-soak-rls2406" {
+  providers = {
+    nomad = nomad.yul1
+  }
+  source = "../"
+
+  aws_access_key_id         = data.vault_generic_secret.fdio_logs.data["access_key"]
+  aws_secret_access_key     = data.vault_generic_secret.fdio_logs.data["secret_key"]
+  aws_default_region        = data.vault_generic_secret.fdio_logs.data["region"]
+  out_aws_access_key_id     = data.vault_generic_secret.fdio_docs.data["access_key"]
+  out_aws_secret_access_key = data.vault_generic_secret.fdio_docs.data["secret_key"]
+  out_aws_default_region    = data.vault_generic_secret.fdio_docs.data["region"]
+  cron                      = "0 30 0 * * * *"
+  datacenters               = ["yul1"]
+  job_name                  = "etl-iterative-soak-rls2406"
+}
+
+module "etl-coverage-device-rls2406" {
+  providers = {
+    nomad = nomad.yul1
+  }
+  source = "../"
+
+  aws_access_key_id         = data.vault_generic_secret.fdio_logs.data["access_key"]
+  aws_secret_access_key     = data.vault_generic_secret.fdio_logs.data["secret_key"]
+  aws_default_region        = data.vault_generic_secret.fdio_logs.data["region"]
+  out_aws_access_key_id     = data.vault_generic_secret.fdio_docs.data["access_key"]
+  out_aws_secret_access_key = data.vault_generic_secret.fdio_docs.data["secret_key"]
+  out_aws_default_region    = data.vault_generic_secret.fdio_docs.data["region"]
+  cron                      = "0 30 0 * * * *"
+  datacenters               = ["yul1"]
+  job_name                  = "etl-coverage-device-rls2406"
+}
+
+module "etl-coverage-hoststack-rls2406" {
+  providers = {
+    nomad = nomad.yul1
+  }
+  source = "../"
+
+  aws_access_key_id         = data.vault_generic_secret.fdio_logs.data["access_key"]
+  aws_secret_access_key     = data.vault_generic_secret.fdio_logs.data["secret_key"]
+  aws_default_region        = data.vault_generic_secret.fdio_logs.data["region"]
+  out_aws_access_key_id     = data.vault_generic_secret.fdio_docs.data["access_key"]
+  out_aws_secret_access_key = data.vault_generic_secret.fdio_docs.data["secret_key"]
+  out_aws_default_region    = data.vault_generic_secret.fdio_docs.data["region"]
+  cron                      = "0 30 0 * * * *"
+  datacenters               = ["yul1"]
+  job_name                  = "etl-coverage-hoststack-rls2406"
+}
+
+module "etl-coverage-mrr-rls2406" {
+  providers = {
+    nomad = nomad.yul1
+  }
+  source = "../"
+
+  aws_access_key_id         = data.vault_generic_secret.fdio_logs.data["access_key"]
+  aws_secret_access_key     = data.vault_generic_secret.fdio_logs.data["secret_key"]
+  aws_default_region        = data.vault_generic_secret.fdio_logs.data["region"]
+  out_aws_access_key_id     = data.vault_generic_secret.fdio_docs.data["access_key"]
+  out_aws_secret_access_key = data.vault_generic_secret.fdio_docs.data["secret_key"]
+  out_aws_default_region    = data.vault_generic_secret.fdio_docs.data["region"]
+  cron                      = "0 30 0 * * * *"
+  datacenters               = ["yul1"]
+  job_name                  = "etl-coverage-mrr-rls2406"
+}
+
+module "etl-coverage-ndrpdr-rls2406" {
+  providers = {
+    nomad = nomad.yul1
+  }
+  source = "../"
+
+  aws_access_key_id         = data.vault_generic_secret.fdio_logs.data["access_key"]
+  aws_secret_access_key     = data.vault_generic_secret.fdio_logs.data["secret_key"]
+  aws_default_region        = data.vault_generic_secret.fdio_logs.data["region"]
+  out_aws_access_key_id     = data.vault_generic_secret.fdio_docs.data["access_key"]
+  out_aws_secret_access_key = data.vault_generic_secret.fdio_docs.data["secret_key"]
+  out_aws_default_region    = data.vault_generic_secret.fdio_docs.data["region"]
+  cron                      = "0 30 0 * * * *"
+  datacenters               = ["yul1"]
+  job_name                  = "etl-coverage-ndrpdr-rls2406"
+}
+
+module "etl-coverage-reconf-rls2406" {
+  providers = {
+    nomad = nomad.yul1
+  }
+  source = "../"
+
+  aws_access_key_id         = data.vault_generic_secret.fdio_logs.data["access_key"]
+  aws_secret_access_key     = data.vault_generic_secret.fdio_logs.data["secret_key"]
+  aws_default_region        = data.vault_generic_secret.fdio_logs.data["region"]
+  out_aws_access_key_id     = data.vault_generic_secret.fdio_docs.data["access_key"]
+  out_aws_secret_access_key = data.vault_generic_secret.fdio_docs.data["secret_key"]
+  out_aws_default_region    = data.vault_generic_secret.fdio_docs.data["region"]
+  cron                      = "0 30 0 * * * *"
+  datacenters               = ["yul1"]
+  job_name                  = "etl-coverage-reconf-rls2406"
+}
+
+module "etl-coverage-soak-rls2406" {
+  providers = {
+    nomad = nomad.yul1
+  }
+  source = "../"
+
+  aws_access_key_id         = data.vault_generic_secret.fdio_logs.data["access_key"]
+  aws_secret_access_key     = data.vault_generic_secret.fdio_logs.data["secret_key"]
+  aws_default_region        = data.vault_generic_secret.fdio_logs.data["region"]
+  out_aws_access_key_id     = data.vault_generic_secret.fdio_docs.data["access_key"]
+  out_aws_secret_access_key = data.vault_generic_secret.fdio_docs.data["secret_key"]
+  out_aws_default_region    = data.vault_generic_secret.fdio_docs.data["region"]
+  cron                      = "0 30 0 * * * *"
+  datacenters               = ["yul1"]
+  job_name                  = "etl-coverage-soak-rls2406"
+}
+