feat(etl): Cleanup and add new release 98/42198/3
authorPeter Mikus <[email protected]>
Fri, 17 Jan 2025 13:51:38 +0000 (14:51 +0100)
committerPeter Mikus <[email protected]>
Mon, 20 Jan 2025 08:06:14 +0000 (08:06 +0000)
Signed-off-by: Peter Mikus <[email protected]>
Change-Id: I7c7126c65b4782a6dde2b902e85749ec365cf0b1

33 files changed:
csit.infra.etl/coverage_device.json [deleted file]
csit.infra.etl/coverage_device_rls2410.py [deleted file]
csit.infra.etl/coverage_hoststack.py [moved from csit.infra.etl/coverage_hoststack_rls2410.py with 96% similarity]
csit.infra.etl/coverage_mrr.py [moved from csit.infra.etl/coverage_mrr_rls2410.py with 96% similarity]
csit.infra.etl/coverage_ndrpdr.py [moved from csit.infra.etl/coverage_ndrpdr_rls2410.py with 96% similarity]
csit.infra.etl/coverage_reconf.py [moved from csit.infra.etl/coverage_reconf_rls2410.py with 96% similarity]
csit.infra.etl/coverage_soak.py [moved from csit.infra.etl/coverage_soak_rls2410.py with 96% similarity]
csit.infra.etl/iterative_hoststack.py [moved from csit.infra.etl/iterative_hoststack_rls2410.py with 97% similarity]
csit.infra.etl/iterative_mrr.py [moved from csit.infra.etl/iterative_mrr_rls2410.py with 97% similarity]
csit.infra.etl/iterative_ndrpdr.py [moved from csit.infra.etl/iterative_ndrpdr_rls2410.py with 97% similarity]
csit.infra.etl/iterative_reconf.py [moved from csit.infra.etl/iterative_reconf_rls2410.py with 97% similarity]
csit.infra.etl/iterative_soak.py [moved from csit.infra.etl/iterative_soak_rls2410.py with 97% similarity]
csit.infra.etl/local.py
csit.infra.etl/stats.py
csit.infra.etl/trending_hoststack.py
csit.infra.etl/trending_mrr.py
csit.infra.etl/trending_ndrpdr.py
csit.infra.etl/trending_reconf.py
csit.infra.etl/trending_soak.py
fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-device-rls2410.hcl.tftpl [deleted file]
fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-hoststack.hcl.tftpl [moved from fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-hoststack-rls2410.hcl.tftpl with 94% similarity]
fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-mrr.hcl.tftpl [moved from fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-mrr-rls2410.hcl.tftpl with 94% similarity]
fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-ndrpdr.hcl.tftpl [moved from fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-ndrpdr-rls2410.hcl.tftpl with 94% similarity]
fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-reconf.hcl.tftpl [moved from fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-reconf-rls2410.hcl.tftpl with 94% similarity]
fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-soak.hcl.tftpl [moved from fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-soak-rls2410.hcl.tftpl with 94% similarity]
fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-hoststack.hcl.tftpl [moved from fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-hoststack-rls2410.hcl.tftpl with 94% similarity]
fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-mrr.hcl.tftpl [moved from fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-mrr-rls2410.hcl.tftpl with 94% similarity]
fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-ndrpdr.hcl.tftpl [moved from fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-ndrpdr-rls2410.hcl.tftpl with 94% similarity]
fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-reconf.hcl.tftpl [moved from fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-reconf-rls2410.hcl.tftpl with 94% similarity]
fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-soak.hcl.tftpl [moved from fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-soak-rls2410.hcl.tftpl with 94% similarity]
fdio.infra.terraform/terraform-nomad-pyspark-etl/main.tf
fdio.infra.terraform/terraform-nomad-pyspark-etl/variables.tf
fdio.infra.terraform/terraform-nomad-pyspark-etl/versions.tf

diff --git a/csit.infra.etl/coverage_device.json b/csit.infra.etl/coverage_device.json
deleted file mode 100644 (file)
index 2a4327c..0000000
+++ /dev/null
@@ -1,89 +0,0 @@
-{
-    "fields": [
-        {
-            "metadata": {},
-            "name": "job",
-            "nullable": false,
-            "type": "string"
-        },
-        {
-            "metadata": {},
-            "name": "build",
-            "nullable": false,
-            "type": "integer"
-        },
-        {
-            "metadata": {},
-            "name": "duration",
-            "nullable": true,
-            "type": "double"
-        },
-        {
-            "metadata": {},
-            "name": "dut_type",
-            "nullable": true,
-            "type": "string"
-        },
-        {
-            "metadata": {},
-            "name": "dut_version",
-            "nullable": true,
-            "type": "string"
-        },
-        {
-            "metadata": {},
-            "name": "tg_type",
-            "nullable": true,
-            "type": "string"
-        },
-        {
-            "metadata": {},
-            "name": "tg_version",
-            "nullable": true,
-            "type": "string"
-        },
-        {
-            "metadata": {},
-            "name": "start_time",
-            "nullable": true,
-            "type": "timestamp"
-        },
-        {
-            "metadata": {},
-            "name": "passed",
-            "nullable": true,
-            "type": "boolean"
-        },
-        {
-            "metadata": {},
-            "name": "test_id",
-            "nullable": true,
-            "type": "string"
-        },
-        {
-            "metadata": {},
-            "name": "test_name_long",
-            "nullable": true,
-            "type": "string"
-        },
-        {
-            "metadata": {},
-            "name": "test_name_short",
-            "nullable": true,
-            "type": "string"
-        },
-        {
-            "metadata": {},
-            "name": "test_type",
-            "nullable": true,
-            "type": "string"
-        },
-        {
-            "metadata": {},
-            "name": "version",
-            "nullable": true,
-            "type": "string"
-        }
-    ],
-    "type": "struct"
-}
\ No newline at end of file
diff --git a/csit.infra.etl/coverage_device_rls2410.py b/csit.infra.etl/coverage_device_rls2410.py
deleted file mode 100644 (file)
index 48746da..0000000
+++ /dev/null
@@ -1,175 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright (c) 2024 Cisco and/or its affiliates.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at:
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""ETL script running on top of the s3://"""
-
-from datetime import datetime, timedelta
-from json import load
-from os import environ
-from pytz import utc
-
-import awswrangler as wr
-from awswrangler.exceptions import EmptyDataFrame
-from awsglue.context import GlueContext
-from boto3 import session
-from pyspark.context import SparkContext
-from pyspark.sql.functions import col, lit, regexp_replace
-from pyspark.sql.types import StructType
-
-
-S3_LOGS_BUCKET=environ.get("S3_LOGS_BUCKET", "fdio-logs-s3-cloudfront-index")
-S3_DOCS_BUCKET=environ.get("S3_DOCS_BUCKET", "fdio-docs-s3-cloudfront-index")
-PATH=f"s3://{S3_LOGS_BUCKET}/vex-yul-rot-jenkins-1/csit-vpp-device-*"
-SUFFIX="info.json.gz"
-IGNORE_SUFFIX=[
-    "suite.info.json.gz",
-    "setup.info.json.gz",
-    "teardown.info.json.gz",
-    "suite.output.info.json.gz",
-    "setup.output.info.json.gz",
-    "teardown.output.info.json.gz"
-]
-LAST_MODIFIED_END=utc.localize(
-    datetime.strptime(
-        f"{datetime.now().year}-{datetime.now().month}-{datetime.now().day}",
-        "%Y-%m-%d"
-    )
-)
-LAST_MODIFIED_BEGIN=LAST_MODIFIED_END - timedelta(1)
-
-
-def flatten_frame(nested_sdf):
-    """Unnest Spark DataFrame in case there nested structered columns.
-
-    :param nested_sdf: Spark DataFrame.
-    :type nested_sdf: DataFrame
-    :returns: Unnest DataFrame.
-    :rtype: DataFrame
-    """
-    stack = [((), nested_sdf)]
-    columns = []
-    while len(stack) > 0:
-        parents, sdf = stack.pop()
-        for column_name, column_type in sdf.dtypes:
-            if column_type[:6] == "struct":
-                projected_sdf = sdf.select(column_name + ".*")
-                stack.append((parents + (column_name,), projected_sdf))
-            else:
-                columns.append(
-                    col(".".join(parents + (column_name,))) \
-                        .alias("_".join(parents + (column_name,)))
-                )
-    return nested_sdf.select(columns)
-
-
-def process_json_to_dataframe(schema_name, paths):
-    """Processes JSON to Spark DataFrame.
-
-    :param schema_name: Schema name.
-    :type schema_name: string
-    :param paths: S3 paths to process.
-    :type paths: list
-    :returns: Spark DataFrame.
-    :rtype: DataFrame
-    """
-    drop_subset = [
-        "dut_type", "dut_version",
-        "passed",
-        "test_name_long", "test_name_short",
-        "test_type",
-        "version"
-    ]
-
-    # load schemas
-    with open(f"coverage_{schema_name}.json", "r", encoding="UTF-8") as f_schema:
-        schema = StructType.fromJson(load(f_schema))
-
-    # create empty DF out of schemas
-    sdf = spark.createDataFrame([], schema)
-
-    # filter list
-    filtered = [path for path in paths if schema_name in path]
-
-    # select
-    for path in filtered:
-        print(path)
-
-        sdf_loaded = spark \
-            .read \
-            .option("multiline", "true") \
-            .schema(schema) \
-            .json(path) \
-            .withColumn("job", lit(path.split("/")[4])) \
-            .withColumn("build", lit(path.split("/")[5]))
-        sdf = sdf.unionByName(sdf_loaded, allowMissingColumns=True)
-
-    # drop rows with all nulls and drop rows with null in critical frames
-    sdf = sdf.na.drop(how="all")
-    sdf = sdf.na.drop(how="any", thresh=None, subset=drop_subset)
-
-    # flatten frame
-    sdf = flatten_frame(sdf)
-
-    return sdf
-
-
-# create SparkContext and GlueContext
-spark_context = SparkContext.getOrCreate()
-spark_context.setLogLevel("WARN")
-glue_context = GlueContext(spark_context)
-spark = glue_context.spark_session
-
-# files of interest
-paths = wr.s3.list_objects(
-    path=PATH,
-    suffix=SUFFIX,
-    last_modified_begin=LAST_MODIFIED_BEGIN,
-    last_modified_end=LAST_MODIFIED_END,
-    ignore_suffix=IGNORE_SUFFIX,
-    ignore_empty=True
-)
-
-filtered_paths = [path for path in paths if "report-coverage-2410" in path]
-
-out_sdf = process_json_to_dataframe("device", filtered_paths)
-out_sdf.printSchema()
-out_sdf = out_sdf \
-    .withColumn("year", lit(datetime.now().year)) \
-    .withColumn("month", lit(datetime.now().month)) \
-    .withColumn("day", lit(datetime.now().day)) \
-    .repartition(1)
-
-try:
-    boto3_session = session.Session(
-        aws_access_key_id=environ["OUT_AWS_ACCESS_KEY_ID"],
-        aws_secret_access_key=environ["OUT_AWS_SECRET_ACCESS_KEY"],
-        region_name=environ["OUT_AWS_DEFAULT_REGION"]
-    )
-except KeyError:
-    boto3_session = session.Session()
-
-try:
-    wr.s3.to_parquet(
-        df=out_sdf.toPandas(),
-        path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/coverage_rls2410",
-        dataset=True,
-        partition_cols=["test_type", "year", "month", "day"],
-        compression="snappy",
-        use_threads=True,
-        mode="overwrite_partitions",
-        boto3_session=boto3_session
-    )
-except EmptyDataFrame:
-    pass
similarity index 96%
rename from csit.infra.etl/coverage_hoststack_rls2410.py
rename to csit.infra.etl/coverage_hoststack.py
index 080e998..8b35279 100644 (file)
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 
-# Copyright (c) 2024 Cisco and/or its affiliates.
+# Copyright (c) 2025 Cisco and/or its affiliates.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at:
@@ -141,7 +141,7 @@ paths = wr.s3.list_objects(
     ignore_empty=True
 )
 
-filtered_paths = [path for path in paths if "report-coverage-2410" in path]
+filtered_paths = [path for path in paths if "report-coverage-2502" in path]
 
 out_sdf = process_json_to_dataframe("hoststack", filtered_paths)
 out_sdf.printSchema()
@@ -163,7 +163,7 @@ except KeyError:
 try:
     wr.s3.to_parquet(
         df=out_sdf.toPandas(),
-        path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/coverage_rls2410",
+        path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/coverage_rls2502",
         dataset=True,
         partition_cols=["test_type", "year", "month", "day"],
         compression="snappy",
similarity index 96%
rename from csit.infra.etl/coverage_mrr_rls2410.py
rename to csit.infra.etl/coverage_mrr.py
index 9400d8d..79bb767 100644 (file)
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 
-# Copyright (c) 2024 Cisco and/or its affiliates.
+# Copyright (c) 2025 Cisco and/or its affiliates.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at:
@@ -141,7 +141,7 @@ paths = wr.s3.list_objects(
     ignore_empty=True
 )
 
-filtered_paths = [path for path in paths if "report-coverage-2410" in path]
+filtered_paths = [path for path in paths if "report-coverage-2502" in path]
 
 out_sdf = process_json_to_dataframe("mrr", filtered_paths)
 out_sdf.printSchema()
@@ -163,7 +163,7 @@ except KeyError:
 try:
     wr.s3.to_parquet(
         df=out_sdf.toPandas(),
-        path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/coverage_rls2410",
+        path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/coverage_rls2502",
         dataset=True,
         partition_cols=["test_type", "year", "month", "day"],
         compression="snappy",
similarity index 96%
rename from csit.infra.etl/coverage_ndrpdr_rls2410.py
rename to csit.infra.etl/coverage_ndrpdr.py
index 18b7627..6ac0863 100644 (file)
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 
-# Copyright (c) 2024 Cisco and/or its affiliates.
+# Copyright (c) 2025 Cisco and/or its affiliates.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at:
@@ -141,7 +141,7 @@ paths = wr.s3.list_objects(
     ignore_empty=True
 )
 
-filtered_paths = [path for path in paths if "report-coverage-2410" in path]
+filtered_paths = [path for path in paths if "report-coverage-2502" in path]
 
 out_sdf = process_json_to_dataframe("ndrpdr", filtered_paths)
 out_sdf.printSchema()
@@ -163,7 +163,7 @@ except KeyError:
 try:
     wr.s3.to_parquet(
         df=out_sdf.toPandas(),
-        path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/coverage_rls2410",
+        path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/coverage_rls2502",
         dataset=True,
         partition_cols=["test_type", "year", "month", "day"],
         compression="snappy",
similarity index 96%
rename from csit.infra.etl/coverage_reconf_rls2410.py
rename to csit.infra.etl/coverage_reconf.py
index e60a3d6..560225f 100644 (file)
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 
-# Copyright (c) 2024 Cisco and/or its affiliates.
+# Copyright (c) 2025 Cisco and/or its affiliates.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at:
@@ -141,7 +141,7 @@ paths = wr.s3.list_objects(
     ignore_empty=True
 )
 
-filtered_paths = [path for path in paths if "report-coverage-2410" in path]
+filtered_paths = [path for path in paths if "report-coverage-2502" in path]
 
 out_sdf = process_json_to_dataframe("reconf", filtered_paths)
 out_sdf.printSchema()
@@ -163,7 +163,7 @@ except KeyError:
 try:
     wr.s3.to_parquet(
         df=out_sdf.toPandas(),
-        path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/coverage_rls2410",
+        path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/coverage_rls2502",
         dataset=True,
         partition_cols=["test_type", "year", "month", "day"],
         compression="snappy",
similarity index 96%
rename from csit.infra.etl/coverage_soak_rls2410.py
rename to csit.infra.etl/coverage_soak.py
index 5dc9fb7..13d143f 100644 (file)
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 
-# Copyright (c) 2024 Cisco and/or its affiliates.
+# Copyright (c) 2025 Cisco and/or its affiliates.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at:
@@ -141,7 +141,7 @@ paths = wr.s3.list_objects(
     ignore_empty=True
 )
 
-filtered_paths = [path for path in paths if "report-coverage-2410" in path]
+filtered_paths = [path for path in paths if "report-coverage-2502" in path]
 
 out_sdf = process_json_to_dataframe("soak", filtered_paths)
 out_sdf.printSchema()
@@ -163,7 +163,7 @@ except KeyError:
 try:
     wr.s3.to_parquet(
         df=out_sdf.toPandas(),
-        path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/coverage_rls2410",
+        path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/coverage_rls2502",
         dataset=True,
         partition_cols=["test_type", "year", "month", "day"],
         compression="snappy",
similarity index 97%
rename from csit.infra.etl/iterative_hoststack_rls2410.py
rename to csit.infra.etl/iterative_hoststack.py
index 6d9e3d6..288d5d7 100644 (file)
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 
-# Copyright (c) 2024 Cisco and/or its affiliates.
+# Copyright (c) 2025 Cisco and/or its affiliates.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at:
@@ -141,7 +141,7 @@ paths = wr.s3.list_objects(
     ignore_empty=True
 )
 
-filtered_paths = [path for path in paths if "report-iterative-2410" in path]
+filtered_paths = [path for path in paths if "report-iterative-2502" in path]
 
 out_sdf = process_json_to_dataframe("hoststack", filtered_paths)
 out_sdf.printSchema()
@@ -163,7 +163,7 @@ except KeyError:
 try:
     wr.s3.to_parquet(
         df=out_sdf.toPandas(),
-        path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/iterative_rls2410",
+        path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/iterative_rls2502",
         dataset=True,
         partition_cols=["test_type", "year", "month", "day"],
         compression="snappy",
similarity index 97%
rename from csit.infra.etl/iterative_mrr_rls2410.py
rename to csit.infra.etl/iterative_mrr.py
index d74e6a9..636c7ed 100644 (file)
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 
-# Copyright (c) 2024 Cisco and/or its affiliates.
+# Copyright (c) 2025 Cisco and/or its affiliates.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at:
@@ -141,7 +141,7 @@ paths = wr.s3.list_objects(
     ignore_empty=True
 )
 
-filtered_paths = [path for path in paths if "report-iterative-2410" in path]
+filtered_paths = [path for path in paths if "report-iterative-2502" in path]
 
 out_sdf = process_json_to_dataframe("mrr", filtered_paths)
 out_sdf.printSchema()
@@ -163,7 +163,7 @@ except KeyError:
 try:
     wr.s3.to_parquet(
         df=out_sdf.toPandas(),
-        path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/iterative_rls2410",
+        path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/iterative_rls2502",
         dataset=True,
         partition_cols=["test_type", "year", "month", "day"],
         compression="snappy",
similarity index 97%
rename from csit.infra.etl/iterative_ndrpdr_rls2410.py
rename to csit.infra.etl/iterative_ndrpdr.py
index 0530dc8..ce29170 100644 (file)
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 
-# Copyright (c) 2024 Cisco and/or its affiliates.
+# Copyright (c) 2025 Cisco and/or its affiliates.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at:
@@ -141,7 +141,7 @@ paths = wr.s3.list_objects(
     ignore_empty=True
 )
 
-filtered_paths = [path for path in paths if "report-iterative-2410" in path]
+filtered_paths = [path for path in paths if "report-iterative-2502" in path]
 
 out_sdf = process_json_to_dataframe("ndrpdr", filtered_paths)
 out_sdf.printSchema()
@@ -163,7 +163,7 @@ except KeyError:
 try:
     wr.s3.to_parquet(
         df=out_sdf.toPandas(),
-        path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/iterative_rls2410",
+        path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/iterative_rls2502",
         dataset=True,
         partition_cols=["test_type", "year", "month", "day"],
         compression="snappy",
similarity index 97%
rename from csit.infra.etl/iterative_reconf_rls2410.py
rename to csit.infra.etl/iterative_reconf.py
index 41d2aed..1381a76 100644 (file)
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 
-# Copyright (c) 2024 Cisco and/or its affiliates.
+# Copyright (c) 2025 Cisco and/or its affiliates.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at:
@@ -141,7 +141,7 @@ paths = wr.s3.list_objects(
     ignore_empty=True
 )
 
-filtered_paths = [path for path in paths if "report-iterative-2410" in path]
+filtered_paths = [path for path in paths if "report-iterative-2502" in path]
 
 out_sdf = process_json_to_dataframe("reconf", filtered_paths)
 out_sdf.show(truncate=False)
@@ -164,7 +164,7 @@ except KeyError:
 try:
     wr.s3.to_parquet(
         df=out_sdf.toPandas(),
-        path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/iterative_rls2410",
+        path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/iterative_rls2502",
         dataset=True,
         partition_cols=["test_type", "year", "month", "day"],
         compression="snappy",
similarity index 97%
rename from csit.infra.etl/iterative_soak_rls2410.py
rename to csit.infra.etl/iterative_soak.py
index f581c7c..3418ba2 100644 (file)
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 
-# Copyright (c) 2024 Cisco and/or its affiliates.
+# Copyright (c) 2025 Cisco and/or its affiliates.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at:
@@ -141,7 +141,7 @@ paths = wr.s3.list_objects(
     ignore_empty=True
 )
 
-filtered_paths = [path for path in paths if "report-iterative-2410" in path]
+filtered_paths = [path for path in paths if "report-iterative-2502" in path]
 
 out_sdf = process_json_to_dataframe("soak", filtered_paths)
 out_sdf.printSchema()
@@ -163,7 +163,7 @@ except KeyError:
 try:
     wr.s3.to_parquet(
         df=out_sdf.toPandas(),
-        path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/iterative_rls2410",
+        path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/iterative_rls2502",
         dataset=True,
         partition_cols=["test_type", "year", "month", "day"],
         compression="snappy",
index e942ceb..41a7c0a 100644 (file)
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 
-# Copyright (c) 2023 Cisco and/or its affiliates.
+# Copyright (c) 2025 Cisco and/or its affiliates.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at:
index d7b4c23..e2c001e 100644 (file)
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 
-# Copyright (c) 2024 Cisco and/or its affiliates.
+# Copyright (c) 2025 Cisco and/or its affiliates.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at:
index 2bbd655..f47cc8a 100644 (file)
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 
-# Copyright (c) 2024 Cisco and/or its affiliates.
+# Copyright (c) 2025 Cisco and/or its affiliates.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at:
index 84b86c7..73bfebc 100644 (file)
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 
-# Copyright (c) 2024 Cisco and/or its affiliates.
+# Copyright (c) 2025 Cisco and/or its affiliates.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at:
index f139f09..cf3b19f 100644 (file)
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 
-# Copyright (c) 2024 Cisco and/or its affiliates.
+# Copyright (c) 2025 Cisco and/or its affiliates.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at:
index 43a18b4..2f20b7c 100644 (file)
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 
-# Copyright (c) 2024 Cisco and/or its affiliates.
+# Copyright (c) 2025 Cisco and/or its affiliates.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at:
index 086a194..b7b59f3 100644 (file)
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 
-# Copyright (c) 2024 Cisco and/or its affiliates.
+# Copyright (c) 2025 Cisco and/or its affiliates.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at:
diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-device-rls2410.hcl.tftpl b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-device-rls2410.hcl.tftpl
deleted file mode 100644 (file)
index 96fef71..0000000
+++ /dev/null
@@ -1,59 +0,0 @@
-job "${job_name}" {
-  datacenters = ["${datacenters}"]
-  type        = "${type}"
-  periodic {
-    cron             = "${cron}"
-    prohibit_overlap = "${prohibit_overlap}"
-    time_zone        = "${time_zone}"
-  }
-  group "${job_name}" {
-    restart {
-      mode = "fail"
-    }
-    constraint {
-      attribute = "$${attr.cpu.arch}"
-      operator  = "!="
-      value     = "arm64"
-    }
-    constraint {
-      attribute = "$${node.class}"
-      value     = "builder"
-    }
-    task "${job_name}" {
-      artifact {
-        source      = "https://raw.githubusercontent.com/FDio/csit/master/csit.infra.etl/coverage_device_rls2410.py"
-        destination = "local/"
-      }
-      artifact {
-        source      = "https://raw.githubusercontent.com/FDio/csit/master/csit.infra.etl/coverage_device.json"
-        destination = "local/"
-      }
-      driver = "docker"
-      config {
-        image   = "${image}"
-        command = "gluesparksubmit"
-        args = [
-          "--driver-memory", "20g",
-          "--executor-memory", "20g",
-          "--executor-cores", "2",
-          "--master", "local[2]",
-          "coverage_device_rls2410.py"
-        ]
-        work_dir = "/local"
-      }
-      env {
-        AWS_ACCESS_KEY_ID         = "${aws_access_key_id}"
-        AWS_SECRET_ACCESS_KEY     = "${aws_secret_access_key}"
-        AWS_DEFAULT_REGION        = "${aws_default_region}"
-        OUT_AWS_ACCESS_KEY_ID     = "${out_aws_access_key_id}"
-        OUT_AWS_SECRET_ACCESS_KEY = "${out_aws_secret_access_key}"
-        OUT_AWS_DEFAULT_REGION    = "${out_aws_default_region}"
-        ${ envs }
-      }
-      resources {
-        cpu    = ${cpu}
-        memory = ${memory}
-      }
-    }
-  }
-}
@@ -21,7 +21,7 @@ job "${job_name}" {
     }
     task "${job_name}" {
       artifact {
-        source      = "https://raw.githubusercontent.com/FDio/csit/master/csit.infra.etl/coverage_hoststack_rls2410.py"
+        source      = "https://raw.githubusercontent.com/FDio/csit/master/csit.infra.etl/coverage_hoststack.py"
         destination = "local/"
       }
       artifact {
@@ -37,7 +37,7 @@ job "${job_name}" {
           "--executor-memory", "20g",
           "--executor-cores", "2",
           "--master", "local[2]",
-          "coverage_hoststack_rls2410.py"
+          "coverage_hoststack.py"
         ]
         work_dir = "/local"
       }
@@ -21,7 +21,7 @@ job "${job_name}" {
     }
     task "${job_name}" {
       artifact {
-        source      = "https://raw.githubusercontent.com/FDio/csit/master/csit.infra.etl/coverage_mrr_rls2410.py"
+        source      = "https://raw.githubusercontent.com/FDio/csit/master/csit.infra.etl/coverage_mrr.py"
         destination = "local/"
       }
       artifact {
@@ -37,7 +37,7 @@ job "${job_name}" {
           "--executor-memory", "20g",
           "--executor-cores", "2",
           "--master", "local[2]",
-          "coverage_mrr_rls2410.py"
+          "coverage_mrr.py"
         ]
         work_dir = "/local"
       }
@@ -21,7 +21,7 @@ job "${job_name}" {
     }
     task "${job_name}" {
       artifact {
-        source      = "https://raw.githubusercontent.com/FDio/csit/master/csit.infra.etl/coverage_ndrpdr_rls2410.py"
+        source      = "https://raw.githubusercontent.com/FDio/csit/master/csit.infra.etl/coverage_ndrpdr.py"
         destination = "local/"
       }
       artifact {
@@ -37,7 +37,7 @@ job "${job_name}" {
           "--executor-memory", "20g",
           "--executor-cores", "2",
           "--master", "local[2]",
-          "coverage_ndrpdr_rls2410.py"
+          "coverage_ndrpdr.py"
         ]
         work_dir = "/local"
       }
@@ -21,7 +21,7 @@ job "${job_name}" {
     }
     task "${job_name}" {
       artifact {
-        source      = "https://raw.githubusercontent.com/FDio/csit/master/csit.infra.etl/coverage_reconf_rls2410.py"
+        source      = "https://raw.githubusercontent.com/FDio/csit/master/csit.infra.etl/coverage_reconf.py"
         destination = "local/"
       }
       artifact {
@@ -37,7 +37,7 @@ job "${job_name}" {
           "--executor-memory", "20g",
           "--executor-cores", "2",
           "--master", "local[2]",
-          "coverage_reconf_rls2410.py"
+          "coverage_reconf.py"
         ]
         work_dir = "/local"
       }
@@ -21,7 +21,7 @@ job "${job_name}" {
     }
     task "${job_name}" {
       artifact {
-        source      = "https://raw.githubusercontent.com/FDio/csit/master/csit.infra.etl/coverage_soak_rls2410.py"
+        source      = "https://raw.githubusercontent.com/FDio/csit/master/csit.infra.etl/coverage_soak.py"
         destination = "local/"
       }
       artifact {
@@ -37,7 +37,7 @@ job "${job_name}" {
           "--executor-memory", "20g",
           "--executor-cores", "2",
           "--master", "local[2]",
-          "coverage_soak_rls2410.py"
+          "coverage_soak.py"
         ]
         work_dir = "/local"
       }
@@ -21,7 +21,7 @@ job "${job_name}" {
     }
     task "${job_name}" {
       artifact {
-        source      = "https://raw.githubusercontent.com/FDio/csit/master/csit.infra.etl/iterative_hoststack_rls2410.py"
+        source      = "https://raw.githubusercontent.com/FDio/csit/master/csit.infra.etl/iterative_hoststack.py"
         destination = "local/"
       }
       artifact {
@@ -37,7 +37,7 @@ job "${job_name}" {
           "--executor-memory", "20g",
           "--executor-cores", "2",
           "--master", "local[2]",
-          "iterative_hoststack_rls2410.py"
+          "iterative_hoststack.py"
         ]
         work_dir = "/local"
       }
@@ -21,7 +21,7 @@ job "${job_name}" {
     }
     task "${job_name}" {
       artifact {
-        source      = "https://raw.githubusercontent.com/FDio/csit/master/csit.infra.etl/iterative_mrr_rls2410.py"
+        source      = "https://raw.githubusercontent.com/FDio/csit/master/csit.infra.etl/iterative_mrr.py"
         destination = "local/"
       }
       artifact {
@@ -37,7 +37,7 @@ job "${job_name}" {
           "--executor-memory", "20g",
           "--executor-cores", "2",
           "--master", "local[2]",
-          "iterative_mrr_rls2410.py"
+          "iterative_mrr.py"
         ]
         work_dir = "/local"
       }
@@ -21,7 +21,7 @@ job "${job_name}" {
     }
     task "${job_name}" {
       artifact {
-        source      = "https://raw.githubusercontent.com/FDio/csit/master/csit.infra.etl/iterative_ndrpdr_rls2410.py"
+        source      = "https://raw.githubusercontent.com/FDio/csit/master/csit.infra.etl/iterative_ndrpdr.py"
         destination = "local/"
       }
       artifact {
@@ -37,7 +37,7 @@ job "${job_name}" {
           "--executor-memory", "20g",
           "--executor-cores", "2",
           "--master", "local[2]",
-          "iterative_ndrpdr_rls2410.py"
+          "iterative_ndrpdr.py"
         ]
         work_dir = "/local"
       }
@@ -21,7 +21,7 @@ job "${job_name}" {
     }
     task "${job_name}" {
       artifact {
-        source      = "https://raw.githubusercontent.com/FDio/csit/master/csit.infra.etl/iterative_reconf_rls2410.py"
+        source      = "https://raw.githubusercontent.com/FDio/csit/master/csit.infra.etl/iterative_reconf.py"
         destination = "local/"
       }
       artifact {
@@ -37,7 +37,7 @@ job "${job_name}" {
           "--executor-memory", "20g",
           "--executor-cores", "2",
           "--master", "local[2]",
-          "iterative_reconf_rls2410.py"
+          "iterative_reconf.py"
         ]
         work_dir = "/local"
       }
@@ -21,7 +21,7 @@ job "${job_name}" {
     }
     task "${job_name}" {
       artifact {
-        source      = "https://raw.githubusercontent.com/FDio/csit/master/csit.infra.etl/iterative_soak_rls2410.py"
+        source      = "https://raw.githubusercontent.com/FDio/csit/master/csit.infra.etl/iterative_soak.py"
         destination = "local/"
       }
       artifact {
@@ -37,7 +37,7 @@ job "${job_name}" {
           "--executor-memory", "20g",
           "--executor-cores", "2",
           "--master", "local[2]",
-          "iterative_soak_rls2410.py"
+          "iterative_soak.py"
         ]
         work_dir = "/local"
       }
index 57baa24..da3a47d 100644 (file)
@@ -8,6 +8,11 @@ data "vault_kv_secret_v2" "fdio_docs" {
   name  = "etl/fdio_docs"
 }
 
+data "vault_kv_secret_v2" "csit_docs" {
+  mount = "kv"
+  name  = "etl/csit_docs"
+}
+
 module "etl" {
   for_each = { for job in var.nomad_jobs : job.job_name => job }
   providers = {
index e8ddeb0..d0d9fa1 100644 (file)
@@ -55,51 +55,47 @@ variable "nomad_jobs" {
       memory = 50000
     },
     {
-      job_name = "etl-trending-hoststack"
+      job_name = "etl-iterative-hoststack"
       memory = 50000
     },
     {
-      job_name = "etl-iterative-hoststack-rls2410"
+      job_name = "etl-iterative-mrr"
       memory = 50000
     },
     {
-      job_name = "etl-iterative-mrr-rls2410"
+      job_name = "etl-iterative-ndrpdr"
       memory = 50000
     },
     {
-      job_name = "etl-iterative-ndrpdr-rls2410"
+      job_name = "etl-iterative-reconf"
       memory = 50000
     },
     {
-      job_name = "etl-iterative-reconf-rls2410"
+      job_name = "etl-iterative-soak"
       memory = 50000
     },
     {
-      job_name = "etl-iterative-soak-rls2410"
+      job_name = "etl-coverage-hoststack"
       memory = 50000
     },
     {
-      job_name = "etl-coverage-device-rls2410"
+      job_name = "etl-coverage-mrr"
       memory = 50000
     },
     {
-      job_name = "etl-coverage-hoststack-rls2410"
+      job_name = "etl-coverage-ndrpdr"
       memory = 50000
     },
     {
-      job_name = "etl-coverage-mrr-rls2410"
+      job_name = "etl-coverage-reconf"
       memory = 50000
     },
     {
-      job_name = "etl-coverage-ndrpdr-rls2410"
+      job_name = "etl-coverage-soak"
       memory = 50000
     },
     {
-      job_name = "etl-coverage-reconf-rls2410"
-      memory = 50000
-    },
-    {
-      job_name = "etl-coverage-soak-rls2410"
+      job_name = "etl-trending-hoststack"
       memory = 50000
     },
     {
index ffe25bb..cf67cc2 100644 (file)
@@ -10,8 +10,8 @@ terraform {
       version = ">= 2.3.0"
     }
     vault = {
-      version = ">= 4.3.0"
+      version = ">= 4.6.0"
     }
   }
-  required_version = ">= 1.5.4"
+  required_version = ">= 1.10.4"
 }