feat(terraform): Refactor ETL 04/41604/1 oper-240930
authorPeter Mikus <[email protected]>
Thu, 26 Sep 2024 07:51:40 +0000 (09:51 +0200)
committerPeter Mikus <[email protected]>
Thu, 26 Sep 2024 07:51:40 +0000 (09:51 +0200)
Signed-off-by: Peter Mikus <[email protected]>
Change-Id: I86711ef80304d72a701ef84737f503ee52659dc5

37 files changed:
csit.infra.etl/coverage_device_rls2410.py [moved from csit.infra.etl/coverage_device_rls2406.py with 98% similarity]
csit.infra.etl/coverage_hoststack_rls2410.py [moved from csit.infra.etl/coverage_hoststack_rls2406.py with 98% similarity]
csit.infra.etl/coverage_mrr_rls2410.py [moved from csit.infra.etl/coverage_mrr_rls2406.py with 98% similarity]
csit.infra.etl/coverage_ndrpdr_rls2410.py [moved from csit.infra.etl/coverage_ndrpdr_rls2406.py with 98% similarity]
csit.infra.etl/coverage_reconf_rls2410.py [moved from csit.infra.etl/coverage_reconf_rls2406.py with 98% similarity]
csit.infra.etl/coverage_soak_rls2410.py [moved from csit.infra.etl/coverage_soak_rls2406.py with 98% similarity]
csit.infra.etl/iterative_hoststack_rls2410.py [moved from csit.infra.etl/iterative_hoststack_rls2406.py with 99% similarity]
csit.infra.etl/iterative_mrr_rls2410.py [moved from csit.infra.etl/iterative_mrr_rls2406.py with 99% similarity]
csit.infra.etl/iterative_ndrpdr_rls2410.py [moved from csit.infra.etl/iterative_ndrpdr_rls2406.py with 99% similarity]
csit.infra.etl/iterative_reconf_rls2410.py [moved from csit.infra.etl/iterative_reconf_rls2406.py with 99% similarity]
csit.infra.etl/iterative_soak_rls2410.py [moved from csit.infra.etl/iterative_soak_rls2406.py with 99% similarity]
fdio.infra.terraform/terraform-nomad-nomad-job/main.tf [new file with mode: 0644]
fdio.infra.terraform/terraform-nomad-nomad-job/variables.tf [new file with mode: 0644]
fdio.infra.terraform/terraform-nomad-nomad-job/versions.tf [new file with mode: 0644]
fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-device-rls2410.hcl.tftpl [moved from fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-ndrpdr-rls2406.hcl.tftpl with 95% similarity]
fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-hoststack-rls2410.hcl.tftpl [new file with mode: 0644]
fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-mrr-rls2410.hcl.tftpl [moved from fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-soak-rls2406.hcl.tftpl with 95% similarity]
fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-ndrpdr-rls2410.hcl.tftpl [new file with mode: 0644]
fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-reconf-rls2410.hcl.tftpl [new file with mode: 0644]
fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-soak-rls2410.hcl.tftpl [moved from fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-device-rls2406.hcl.tftpl with 95% similarity]
fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-hoststack-rls2406.hcl.tftpl [deleted file]
fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-hoststack-rls2410.hcl.tftpl [new file with mode: 0644]
fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-mrr-rls2406.hcl.tftpl [deleted file]
fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-mrr-rls2410.hcl.tftpl [moved from fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-reconf-rls2406.hcl.tftpl with 95% similarity]
fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-ndrpdr-rls2406.hcl.tftpl [deleted file]
fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-ndrpdr-rls2410.hcl.tftpl [moved from fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-hoststack-rls2406.hcl.tftpl with 94% similarity]
fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-reconf-rls2406.hcl.tftpl [deleted file]
fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-reconf-rls2410.hcl.tftpl [new file with mode: 0644]
fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-soak-rls2406.hcl.tftpl [deleted file]
fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-soak-rls2410.hcl.tftpl [moved from fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-mrr-rls2406.hcl.tftpl with 95% similarity]
fdio.infra.terraform/terraform-nomad-pyspark-etl/fdio/main.tf [deleted file]
fdio.infra.terraform/terraform-nomad-pyspark-etl/fdio/variables.tf [deleted file]
fdio.infra.terraform/terraform-nomad-pyspark-etl/fdio/versions.tf [deleted file]
fdio.infra.terraform/terraform-nomad-pyspark-etl/main.tf
fdio.infra.terraform/terraform-nomad-pyspark-etl/providers.tf [moved from fdio.infra.terraform/terraform-nomad-pyspark-etl/fdio/providers.tf with 100% similarity]
fdio.infra.terraform/terraform-nomad-pyspark-etl/variables.tf
fdio.infra.terraform/terraform-nomad-pyspark-etl/versions.tf

similarity index 98%
rename from csit.infra.etl/coverage_device_rls2406.py
rename to csit.infra.etl/coverage_device_rls2410.py
index 30a4824..48746da 100644 (file)
@@ -141,7 +141,7 @@ paths = wr.s3.list_objects(
     ignore_empty=True
 )
 
-filtered_paths = [path for path in paths if "report-coverage-2406" in path]
+filtered_paths = [path for path in paths if "report-coverage-2410" in path]
 
 out_sdf = process_json_to_dataframe("device", filtered_paths)
 out_sdf.printSchema()
@@ -163,7 +163,7 @@ except KeyError:
 try:
     wr.s3.to_parquet(
         df=out_sdf.toPandas(),
-        path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/coverage_rls2406",
+        path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/coverage_rls2410",
         dataset=True,
         partition_cols=["test_type", "year", "month", "day"],
         compression="snappy",
similarity index 98%
rename from csit.infra.etl/coverage_hoststack_rls2406.py
rename to csit.infra.etl/coverage_hoststack_rls2410.py
index 8a0a79b..080e998 100644 (file)
@@ -141,7 +141,7 @@ paths = wr.s3.list_objects(
     ignore_empty=True
 )
 
-filtered_paths = [path for path in paths if "report-coverage-2406" in path]
+filtered_paths = [path for path in paths if "report-coverage-2410" in path]
 
 out_sdf = process_json_to_dataframe("hoststack", filtered_paths)
 out_sdf.printSchema()
@@ -163,7 +163,7 @@ except KeyError:
 try:
     wr.s3.to_parquet(
         df=out_sdf.toPandas(),
-        path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/coverage_rls2406",
+        path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/coverage_rls2410",
         dataset=True,
         partition_cols=["test_type", "year", "month", "day"],
         compression="snappy",
similarity index 98%
rename from csit.infra.etl/coverage_mrr_rls2406.py
rename to csit.infra.etl/coverage_mrr_rls2410.py
index 365159d..9400d8d 100644 (file)
@@ -141,7 +141,7 @@ paths = wr.s3.list_objects(
     ignore_empty=True
 )
 
-filtered_paths = [path for path in paths if "report-coverage-2406" in path]
+filtered_paths = [path for path in paths if "report-coverage-2410" in path]
 
 out_sdf = process_json_to_dataframe("mrr", filtered_paths)
 out_sdf.printSchema()
@@ -163,7 +163,7 @@ except KeyError:
 try:
     wr.s3.to_parquet(
         df=out_sdf.toPandas(),
-        path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/coverage_rls2406",
+        path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/coverage_rls2410",
         dataset=True,
         partition_cols=["test_type", "year", "month", "day"],
         compression="snappy",
similarity index 98%
rename from csit.infra.etl/coverage_ndrpdr_rls2406.py
rename to csit.infra.etl/coverage_ndrpdr_rls2410.py
index 8ad453e..18b7627 100644 (file)
@@ -141,7 +141,7 @@ paths = wr.s3.list_objects(
     ignore_empty=True
 )
 
-filtered_paths = [path for path in paths if "report-coverage-2406" in path]
+filtered_paths = [path for path in paths if "report-coverage-2410" in path]
 
 out_sdf = process_json_to_dataframe("ndrpdr", filtered_paths)
 out_sdf.printSchema()
@@ -163,7 +163,7 @@ except KeyError:
 try:
     wr.s3.to_parquet(
         df=out_sdf.toPandas(),
-        path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/coverage_rls2406",
+        path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/coverage_rls2410",
         dataset=True,
         partition_cols=["test_type", "year", "month", "day"],
         compression="snappy",
similarity index 98%
rename from csit.infra.etl/coverage_reconf_rls2406.py
rename to csit.infra.etl/coverage_reconf_rls2410.py
index 8685809..4e596c2 100644 (file)
@@ -140,7 +140,7 @@ paths = wr.s3.list_objects(
     ignore_suffix=IGNORE_SUFFIX,
     ignore_empty=True
 
-filtered_paths = [path for path in paths if "report-coverage-2406" in path]
+filtered_paths = [path for path in paths if "report-coverage-2410" in path]
 
 out_sdf = process_json_to_dataframe("reconf", filtered_paths)
 out_sdf.printSchema()
@@ -163,7 +163,7 @@ except KeyError:
 try:
     wr.s3.to_parquet(
         df=out_sdf.toPandas(),
-        path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/coverage_rls2406",
+        path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/coverage_rls2410",
         dataset=True,
         partition_cols=["test_type", "year", "month", "day"],
         compression="snappy",
similarity index 98%
rename from csit.infra.etl/coverage_soak_rls2406.py
rename to csit.infra.etl/coverage_soak_rls2410.py
index 6f06f2c..5dc9fb7 100644 (file)
@@ -141,7 +141,7 @@ paths = wr.s3.list_objects(
     ignore_empty=True
 )
 
-filtered_paths = [path for path in paths if "report-coverage-2406" in path]
+filtered_paths = [path for path in paths if "report-coverage-2410" in path]
 
 out_sdf = process_json_to_dataframe("soak", filtered_paths)
 out_sdf.printSchema()
@@ -163,7 +163,7 @@ except KeyError:
 try:
     wr.s3.to_parquet(
         df=out_sdf.toPandas(),
-        path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/coverage_rls2406",
+        path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/coverage_rls2410",
         dataset=True,
         partition_cols=["test_type", "year", "month", "day"],
         compression="snappy",
@@ -141,7 +141,7 @@ paths = wr.s3.list_objects(
     ignore_empty=True
 )
 
-filtered_paths = [path for path in paths if "report-iterative-2406" in path]
+filtered_paths = [path for path in paths if "report-iterative-2410" in path]
 
 out_sdf = process_json_to_dataframe("hoststack", filtered_paths)
 out_sdf.printSchema()
@@ -163,7 +163,7 @@ except KeyError:
 try:
     wr.s3.to_parquet(
         df=out_sdf.toPandas(),
-        path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/iterative_rls2406",
+        path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/iterative_rls2410",
         dataset=True,
         partition_cols=["test_type", "year", "month", "day"],
         compression="snappy",
similarity index 99%
rename from csit.infra.etl/iterative_mrr_rls2406.py
rename to csit.infra.etl/iterative_mrr_rls2410.py
index a1a8d96..d74e6a9 100644 (file)
@@ -141,7 +141,7 @@ paths = wr.s3.list_objects(
     ignore_empty=True
 )
 
-filtered_paths = [path for path in paths if "report-iterative-2406" in path]
+filtered_paths = [path for path in paths if "report-iterative-2410" in path]
 
 out_sdf = process_json_to_dataframe("mrr", filtered_paths)
 out_sdf.printSchema()
@@ -163,7 +163,7 @@ except KeyError:
 try:
     wr.s3.to_parquet(
         df=out_sdf.toPandas(),
-        path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/iterative_rls2406",
+        path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/iterative_rls2410",
         dataset=True,
         partition_cols=["test_type", "year", "month", "day"],
         compression="snappy",
similarity index 99%
rename from csit.infra.etl/iterative_ndrpdr_rls2406.py
rename to csit.infra.etl/iterative_ndrpdr_rls2410.py
index bb474f1..0530dc8 100644 (file)
@@ -141,7 +141,7 @@ paths = wr.s3.list_objects(
     ignore_empty=True
 )
 
-filtered_paths = [path for path in paths if "report-iterative-2406" in path]
+filtered_paths = [path for path in paths if "report-iterative-2410" in path]
 
 out_sdf = process_json_to_dataframe("ndrpdr", filtered_paths)
 out_sdf.printSchema()
@@ -163,7 +163,7 @@ except KeyError:
 try:
     wr.s3.to_parquet(
         df=out_sdf.toPandas(),
-        path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/iterative_rls2406",
+        path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/iterative_rls2410",
         dataset=True,
         partition_cols=["test_type", "year", "month", "day"],
         compression="snappy",
similarity index 99%
rename from csit.infra.etl/iterative_reconf_rls2406.py
rename to csit.infra.etl/iterative_reconf_rls2410.py
index 71e92b4..41d2aed 100644 (file)
@@ -141,7 +141,7 @@ paths = wr.s3.list_objects(
     ignore_empty=True
 )
 
-filtered_paths = [path for path in paths if "report-iterative-2406" in path]
+filtered_paths = [path for path in paths if "report-iterative-2410" in path]
 
 out_sdf = process_json_to_dataframe("reconf", filtered_paths)
 out_sdf.show(truncate=False)
@@ -164,7 +164,7 @@ except KeyError:
 try:
     wr.s3.to_parquet(
         df=out_sdf.toPandas(),
-        path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/iterative_rls2406",
+        path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/iterative_rls2410",
         dataset=True,
         partition_cols=["test_type", "year", "month", "day"],
         compression="snappy",
similarity index 99%
rename from csit.infra.etl/iterative_soak_rls2406.py
rename to csit.infra.etl/iterative_soak_rls2410.py
index 9664515..f581c7c 100644 (file)
@@ -141,7 +141,7 @@ paths = wr.s3.list_objects(
     ignore_empty=True
 )
 
-filtered_paths = [path for path in paths if "report-iterative-2406" in path]
+filtered_paths = [path for path in paths if "report-iterative-2410" in path]
 
 out_sdf = process_json_to_dataframe("soak", filtered_paths)
 out_sdf.printSchema()
@@ -163,7 +163,7 @@ except KeyError:
 try:
     wr.s3.to_parquet(
         df=out_sdf.toPandas(),
-        path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/iterative_rls2406",
+        path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/iterative_rls2410",
         dataset=True,
         partition_cols=["test_type", "year", "month", "day"],
         compression="snappy",
diff --git a/fdio.infra.terraform/terraform-nomad-nomad-job/main.tf b/fdio.infra.terraform/terraform-nomad-nomad-job/main.tf
new file mode 100644 (file)
index 0000000..fc604fe
--- /dev/null
@@ -0,0 +1,33 @@
+locals {
+  datacenters = join(",", var.datacenters)
+  envs        = join("\n", concat([], var.envs))
+}
+
+resource "nomad_job" "nomad_job" {
+  jobspec = templatefile(
+    "${path.cwd}/conf/nomad/${var.job_name}.hcl.tftpl",
+    {
+      aws_access_key_id         = var.aws_access_key_id,
+      aws_secret_access_key     = var.aws_secret_access_key,
+      aws_default_region        = var.aws_default_region
+      cpu                       = var.cpu,
+      cron                      = var.cron,
+      datacenters               = local.datacenters,
+      envs                      = local.envs,
+      image                     = var.image,
+      job_name                  = var.job_name,
+      memory                    = var.memory,
+      out_aws_access_key_id     = var.out_aws_access_key_id,
+      out_aws_secret_access_key = var.out_aws_secret_access_key,
+      out_aws_default_region    = var.out_aws_default_region
+      prohibit_overlap          = var.prohibit_overlap,
+      time_zone                 = var.time_zone,
+      type                      = var.type,
+      use_vault_provider        = var.vault_secret.use_vault_provider,
+      vault_kv_policy_name      = var.vault_secret.vault_kv_policy_name,
+      vault_kv_path             = var.vault_secret.vault_kv_path,
+      vault_kv_field_access_key = var.vault_secret.vault_kv_field_access_key,
+      vault_kv_field_secret_key = var.vault_secret.vault_kv_field_secret_key
+  })
+  detach = false
+}
diff --git a/fdio.infra.terraform/terraform-nomad-nomad-job/variables.tf b/fdio.infra.terraform/terraform-nomad-nomad-job/variables.tf
new file mode 100644 (file)
index 0000000..86d1b45
--- /dev/null
@@ -0,0 +1,115 @@
+# Nomad
+variable "datacenters" {
+  description = "Specifies the list of DCs to be considered placing this task."
+  type        = list(string)
+  default     = ["dc1"]
+}
+
+# ETL
+variable "aws_access_key_id" {
+  description = "AWS access key."
+  type        = string
+  default     = "aws"
+}
+
+variable "aws_secret_access_key" {
+  description = "AWS secret key"
+  type        = string
+  default     = "aws"
+}
+
+variable "aws_default_region" {
+  description = "AWS region"
+  type        = string
+  default     = "aws"
+}
+
+variable "cpu" {
+  description = "Specifies the CPU required to run this task in MHz."
+  type        = number
+  default     = 10000
+}
+
+variable "cron" {
+  description = "Specifies a cron expression configuring the interval to launch."
+  type        = string
+  default     = "@daily"
+}
+
+variable "envs" {
+  description = "Specifies ETL environment variables."
+  type        = list(string)
+  default     = []
+}
+
+variable "image" {
+  description = "Specifies the Docker image to run."
+  type        = string
+  default     = "pmikus/docker-ubuntu-focal-aws-glue:latest"
+}
+
+variable "job_name" {
+  description = "Specifies a name for the job."
+  type        = string
+  default     = "etl"
+}
+
+variable "memory" {
+  description = "Specifies the memory required in MB."
+  type        = number
+  default     = 50000
+}
+
+variable "out_aws_access_key_id" {
+  description = "AWS access key."
+  type        = string
+  default     = "aws"
+}
+
+variable "out_aws_secret_access_key" {
+  description = "AWS secret key"
+  type        = string
+  default     = "aws"
+}
+
+variable "out_aws_default_region" {
+  description = "AWS region"
+  type        = string
+  default     = "aws"
+}
+
+variable "prohibit_overlap" {
+  description = "Specifies if this job should wait until previous completed."
+  type        = bool
+  default     = true
+}
+
+variable "time_zone" {
+  description = "Specifies the time zone to evaluate the next launch interval."
+  type        = string
+  default     = "UTC"
+}
+
+variable "type" {
+  description = "Specifies the Nomad scheduler to use."
+  type        = string
+  default     = "batch"
+}
+
+variable "vault_secret" {
+  type = object({
+    use_vault_provider        = bool,
+    vault_kv_policy_name      = string,
+    vault_kv_path             = string,
+    vault_kv_field_access_key = string,
+    vault_kv_field_secret_key = string
+  })
+  description = "Set of properties to be able to fetch secret from vault."
+  default = {
+    use_vault_provider        = true
+    vault_kv_policy_name      = "kv"
+    vault_kv_path             = "data/etl"
+    vault_kv_field_access_key = "access_key"
+    vault_kv_field_secret_key = "secret_key"
+  }
+}
diff --git a/fdio.infra.terraform/terraform-nomad-nomad-job/versions.tf b/fdio.infra.terraform/terraform-nomad-nomad-job/versions.tf
new file mode 100644 (file)
index 0000000..f40435f
--- /dev/null
@@ -0,0 +1,9 @@
+terraform {
+  required_providers {
+    nomad = {
+      source  = "hashicorp/nomad"
+      version = ">= 1.4.20"
+    }
+  }
+  required_version = ">= 1.5.4"
+}
@@ -1,5 +1,5 @@
 job "${job_name}" {
-  datacenters = "${datacenters}"
+  datacenters = ["${datacenters}"]
   type        = "${type}"
   periodic {
     cron             = "${cron}"
@@ -33,7 +33,7 @@ job "${job_name}" {
           "--executor-memory", "20g",
           "--executor-cores", "2",
           "--master", "local[2]",
-          "coverage_ndrpdr_rls2406.py"
+          "coverage_device_rls2410.py"
         ]
         work_dir = "/local/csit/csit.infra.etl"
       }
diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-hoststack-rls2410.hcl.tftpl b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-hoststack-rls2410.hcl.tftpl
new file mode 100644 (file)
index 0000000..34518d3
--- /dev/null
@@ -0,0 +1,55 @@
+job "${job_name}" {
+  datacenters = ["${datacenters}"]
+  type        = "${type}"
+  periodic {
+    cron             = "${cron}"
+    prohibit_overlap = "${prohibit_overlap}"
+    time_zone        = "${time_zone}"
+  }
+  group "${job_name}" {
+    restart {
+      mode = "fail"
+    }
+    constraint {
+      attribute = "$${attr.cpu.arch}"
+      operator  = "!="
+      value     = "arm64"
+    }
+    constraint {
+      attribute = "$${node.class}"
+      value     = "builder"
+    }
+    task "${job_name}" {
+      artifact {
+        source      = "git::https://github.com/FDio/csit"
+        destination = "local/csit"
+      }
+      driver = "docker"
+      config {
+        image   = "${image}"
+        command = "gluesparksubmit"
+        args = [
+          "--driver-memory", "20g",
+          "--executor-memory", "20g",
+          "--executor-cores", "2",
+          "--master", "local[2]",
+          "coverage_hoststack_rls2410.py"
+        ]
+        work_dir = "/local/csit/csit.infra.etl"
+      }
+      env {
+        AWS_ACCESS_KEY_ID         = "${aws_access_key_id}"
+        AWS_SECRET_ACCESS_KEY     = "${aws_secret_access_key}"
+        AWS_DEFAULT_REGION        = "${aws_default_region}"
+        OUT_AWS_ACCESS_KEY_ID     = "${out_aws_access_key_id}"
+        OUT_AWS_SECRET_ACCESS_KEY = "${out_aws_secret_access_key}"
+        OUT_AWS_DEFAULT_REGION    = "${out_aws_default_region}"
+        ${ envs }
+      }
+      resources {
+        cpu    = ${cpu}
+        memory = ${memory}
+      }
+    }
+  }
+}
@@ -1,5 +1,5 @@
 job "${job_name}" {
-  datacenters = "${datacenters}"
+  datacenters = ["${datacenters}"]
   type        = "${type}"
   periodic {
     cron             = "${cron}"
@@ -33,7 +33,7 @@ job "${job_name}" {
           "--executor-memory", "20g",
           "--executor-cores", "2",
           "--master", "local[2]",
-          "coverage_soak_rls2406.py"
+          "coverage_mrr_rls2410.py"
         ]
         work_dir = "/local/csit/csit.infra.etl"
       }
diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-ndrpdr-rls2410.hcl.tftpl b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-ndrpdr-rls2410.hcl.tftpl
new file mode 100644 (file)
index 0000000..49569f2
--- /dev/null
@@ -0,0 +1,55 @@
+job "${job_name}" {
+  datacenters = ["${datacenters}"]
+  type        = "${type}"
+  periodic {
+    cron             = "${cron}"
+    prohibit_overlap = "${prohibit_overlap}"
+    time_zone        = "${time_zone}"
+  }
+  group "${job_name}" {
+    restart {
+      mode = "fail"
+    }
+    constraint {
+      attribute = "$${attr.cpu.arch}"
+      operator  = "!="
+      value     = "arm64"
+    }
+    constraint {
+      attribute = "$${node.class}"
+      value     = "builder"
+    }
+    task "${job_name}" {
+      artifact {
+        source      = "git::https://github.com/FDio/csit"
+        destination = "local/csit"
+      }
+      driver = "docker"
+      config {
+        image   = "${image}"
+        command = "gluesparksubmit"
+        args = [
+          "--driver-memory", "20g",
+          "--executor-memory", "20g",
+          "--executor-cores", "2",
+          "--master", "local[2]",
+          "coverage_ndrpdr_rls2410.py"
+        ]
+        work_dir = "/local/csit/csit.infra.etl"
+      }
+      env {
+        AWS_ACCESS_KEY_ID         = "${aws_access_key_id}"
+        AWS_SECRET_ACCESS_KEY     = "${aws_secret_access_key}"
+        AWS_DEFAULT_REGION        = "${aws_default_region}"
+        OUT_AWS_ACCESS_KEY_ID     = "${out_aws_access_key_id}"
+        OUT_AWS_SECRET_ACCESS_KEY = "${out_aws_secret_access_key}"
+        OUT_AWS_DEFAULT_REGION    = "${out_aws_default_region}"
+        ${ envs }
+      }
+      resources {
+        cpu    = ${cpu}
+        memory = ${memory}
+      }
+    }
+  }
+}
diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-reconf-rls2410.hcl.tftpl b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-reconf-rls2410.hcl.tftpl
new file mode 100644 (file)
index 0000000..ad973cc
--- /dev/null
@@ -0,0 +1,55 @@
+job "${job_name}" {
+  datacenters = ["${datacenters}"]
+  type        = "${type}"
+  periodic {
+    cron             = "${cron}"
+    prohibit_overlap = "${prohibit_overlap}"
+    time_zone        = "${time_zone}"
+  }
+  group "${job_name}" {
+    restart {
+      mode = "fail"
+    }
+    constraint {
+      attribute = "$${attr.cpu.arch}"
+      operator  = "!="
+      value     = "arm64"
+    }
+    constraint {
+      attribute = "$${node.class}"
+      value     = "builder"
+    }
+    task "${job_name}" {
+      artifact {
+        source      = "git::https://github.com/FDio/csit"
+        destination = "local/csit"
+      }
+      driver = "docker"
+      config {
+        image   = "${image}"
+        command = "gluesparksubmit"
+        args = [
+          "--driver-memory", "20g",
+          "--executor-memory", "20g",
+          "--executor-cores", "2",
+          "--master", "local[2]",
+          "coverage_reconf_rls2410.py"
+        ]
+        work_dir = "/local/csit/csit.infra.etl"
+      }
+      env {
+        AWS_ACCESS_KEY_ID         = "${aws_access_key_id}"
+        AWS_SECRET_ACCESS_KEY     = "${aws_secret_access_key}"
+        AWS_DEFAULT_REGION        = "${aws_default_region}"
+        OUT_AWS_ACCESS_KEY_ID     = "${out_aws_access_key_id}"
+        OUT_AWS_SECRET_ACCESS_KEY = "${out_aws_secret_access_key}"
+        OUT_AWS_DEFAULT_REGION    = "${out_aws_default_region}"
+        ${ envs }
+      }
+      resources {
+        cpu    = ${cpu}
+        memory = ${memory}
+      }
+    }
+  }
+}
@@ -1,5 +1,5 @@
 job "${job_name}" {
-  datacenters = "${datacenters}"
+  datacenters = ["${datacenters}"]
   type        = "${type}"
   periodic {
     cron             = "${cron}"
@@ -33,7 +33,7 @@ job "${job_name}" {
           "--executor-memory", "20g",
           "--executor-cores", "2",
           "--master", "local[2]",
-          "coverage_device_rls2406.py"
+          "coverage_soak_rls2410.py"
         ]
         work_dir = "/local/csit/csit.infra.etl"
       }
diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-hoststack-rls2406.hcl.tftpl b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-hoststack-rls2406.hcl.tftpl
deleted file mode 100644 (file)
index 534fa04..0000000
+++ /dev/null
@@ -1,55 +0,0 @@
-job "${job_name}" {
-  datacenters = "${datacenters}"
-  type        = "${type}"
-  periodic {
-    cron             = "${cron}"
-    prohibit_overlap = "${prohibit_overlap}"
-    time_zone        = "${time_zone}"
-  }
-  group "${job_name}" {
-    restart {
-      mode = "fail"
-    }
-    constraint {
-      attribute = "$${attr.cpu.arch}"
-      operator  = "!="
-      value     = "arm64"
-    }
-    constraint {
-      attribute = "$${node.class}"
-      value     = "builder"
-    }
-    task "${job_name}" {
-      artifact {
-        source      = "git::https://github.com/FDio/csit"
-        destination = "local/csit"
-      }
-      driver = "docker"
-      config {
-        image   = "${image}"
-        command = "gluesparksubmit"
-        args = [
-          "--driver-memory", "20g",
-          "--executor-memory", "20g",
-          "--executor-cores", "2",
-          "--master", "local[2]",
-          "iterative_hoststack_rls2406.py"
-        ]
-        work_dir = "/local/csit/csit.infra.etl"
-      }
-      env {
-        AWS_ACCESS_KEY_ID         = "${aws_access_key_id}"
-        AWS_SECRET_ACCESS_KEY     = "${aws_secret_access_key}"
-        AWS_DEFAULT_REGION        = "${aws_default_region}"
-        OUT_AWS_ACCESS_KEY_ID     = "${out_aws_access_key_id}"
-        OUT_AWS_SECRET_ACCESS_KEY = "${out_aws_secret_access_key}"
-        OUT_AWS_DEFAULT_REGION    = "${out_aws_default_region}"
-        ${ envs }
-      }
-      resources {
-        cpu    = ${cpu}
-        memory = ${memory}
-      }
-    }
-  }
-}
diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-hoststack-rls2410.hcl.tftpl b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-hoststack-rls2410.hcl.tftpl
new file mode 100644 (file)
index 0000000..4d7217b
--- /dev/null
@@ -0,0 +1,55 @@
+job "${job_name}" {
+  datacenters = ["${datacenters}"]
+  type        = "${type}"
+  periodic {
+    cron             = "${cron}"
+    prohibit_overlap = "${prohibit_overlap}"
+    time_zone        = "${time_zone}"
+  }
+  group "${job_name}" {
+    restart {
+      mode = "fail"
+    }
+    constraint {
+      attribute = "$${attr.cpu.arch}"
+      operator  = "!="
+      value     = "arm64"
+    }
+    constraint {
+      attribute = "$${node.class}"
+      value     = "builder"
+    }
+    task "${job_name}" {
+      artifact {
+        source      = "git::https://github.com/FDio/csit"
+        destination = "local/csit"
+      }
+      driver = "docker"
+      config {
+        image   = "${image}"
+        command = "gluesparksubmit"
+        args = [
+          "--driver-memory", "20g",
+          "--executor-memory", "20g",
+          "--executor-cores", "2",
+          "--master", "local[2]",
+          "iterative_hoststack_rls2410.py"
+        ]
+        work_dir = "/local/csit/csit.infra.etl"
+      }
+      env {
+        AWS_ACCESS_KEY_ID         = "${aws_access_key_id}"
+        AWS_SECRET_ACCESS_KEY     = "${aws_secret_access_key}"
+        AWS_DEFAULT_REGION        = "${aws_default_region}"
+        OUT_AWS_ACCESS_KEY_ID     = "${out_aws_access_key_id}"
+        OUT_AWS_SECRET_ACCESS_KEY = "${out_aws_secret_access_key}"
+        OUT_AWS_DEFAULT_REGION    = "${out_aws_default_region}"
+        ${ envs }
+      }
+      resources {
+        cpu    = ${cpu}
+        memory = ${memory}
+      }
+    }
+  }
+}
diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-mrr-rls2406.hcl.tftpl b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-mrr-rls2406.hcl.tftpl
deleted file mode 100644 (file)
index a1992bb..0000000
+++ /dev/null
@@ -1,55 +0,0 @@
-job "${job_name}" {
-  datacenters = "${datacenters}"
-  type        = "${type}"
-  periodic {
-    cron             = "${cron}"
-    prohibit_overlap = "${prohibit_overlap}"
-    time_zone        = "${time_zone}"
-  }
-  group "${job_name}" {
-    restart {
-      mode = "fail"
-    }
-    constraint {
-      attribute = "$${attr.cpu.arch}"
-      operator  = "!="
-      value     = "arm64"
-    }
-    constraint {
-      attribute = "$${node.class}"
-      value     = "builder"
-    }
-    task "${job_name}" {
-      artifact {
-        source      = "git::https://github.com/FDio/csit"
-        destination = "local/csit"
-      }
-      driver = "docker"
-      config {
-        image   = "${image}"
-        command = "gluesparksubmit"
-        args = [
-          "--driver-memory", "20g",
-          "--executor-memory", "20g",
-          "--executor-cores", "2",
-          "--master", "local[2]",
-          "iterative_mrr_rls2406.py"
-        ]
-        work_dir = "/local/csit/csit.infra.etl"
-      }
-      env {
-        AWS_ACCESS_KEY_ID         = "${aws_access_key_id}"
-        AWS_SECRET_ACCESS_KEY     = "${aws_secret_access_key}"
-        AWS_DEFAULT_REGION        = "${aws_default_region}"
-        OUT_AWS_ACCESS_KEY_ID     = "${out_aws_access_key_id}"
-        OUT_AWS_SECRET_ACCESS_KEY = "${out_aws_secret_access_key}"
-        OUT_AWS_DEFAULT_REGION    = "${out_aws_default_region}"
-        ${ envs }
-      }
-      resources {
-        cpu    = ${cpu}
-        memory = ${memory}
-      }
-    }
-  }
-}
@@ -1,5 +1,5 @@
 job "${job_name}" {
-  datacenters = "${datacenters}"
+  datacenters = ["${datacenters}"]
   type        = "${type}"
   periodic {
     cron             = "${cron}"
@@ -33,7 +33,7 @@ job "${job_name}" {
           "--executor-memory", "20g",
           "--executor-cores", "2",
           "--master", "local[2]",
-          "coverage_reconf_rls2406.py"
+          "iterative_mrr_rls2410.py"
         ]
         work_dir = "/local/csit/csit.infra.etl"
       }
diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-ndrpdr-rls2406.hcl.tftpl b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-ndrpdr-rls2406.hcl.tftpl
deleted file mode 100644 (file)
index 461dad8..0000000
+++ /dev/null
@@ -1,55 +0,0 @@
-job "${job_name}" {
-  datacenters = "${datacenters}"
-  type        = "${type}"
-  periodic {
-    cron             = "${cron}"
-    prohibit_overlap = "${prohibit_overlap}"
-    time_zone        = "${time_zone}"
-  }
-  group "${job_name}" {
-    restart {
-      mode = "fail"
-    }
-    constraint {
-      attribute = "$${attr.cpu.arch}"
-      operator  = "!="
-      value     = "arm64"
-    }
-    constraint {
-      attribute = "$${node.class}"
-      value     = "builder"
-    }
-    task "${job_name}" {
-      artifact {
-        source      = "git::https://github.com/FDio/csit"
-        destination = "local/csit"
-      }
-      driver = "docker"
-      config {
-        image   = "${image}"
-        command = "gluesparksubmit"
-        args = [
-          "--driver-memory", "20g",
-          "--executor-memory", "20g",
-          "--executor-cores", "2",
-          "--master", "local[2]",
-          "iterative_ndrpdr_rls2406.py"
-        ]
-        work_dir = "/local/csit/csit.infra.etl"
-      }
-      env {
-        AWS_ACCESS_KEY_ID         = "${aws_access_key_id}"
-        AWS_SECRET_ACCESS_KEY     = "${aws_secret_access_key}"
-        AWS_DEFAULT_REGION        = "${aws_default_region}"
-        OUT_AWS_ACCESS_KEY_ID     = "${out_aws_access_key_id}"
-        OUT_AWS_SECRET_ACCESS_KEY = "${out_aws_secret_access_key}"
-        OUT_AWS_DEFAULT_REGION    = "${out_aws_default_region}"
-        ${ envs }
-      }
-      resources {
-        cpu    = ${cpu}
-        memory = ${memory}
-      }
-    }
-  }
-}
@@ -1,5 +1,5 @@
 job "${job_name}" {
-  datacenters = "${datacenters}"
+  datacenters = ["${datacenters}"]
   type        = "${type}"
   periodic {
     cron             = "${cron}"
@@ -33,7 +33,7 @@ job "${job_name}" {
           "--executor-memory", "20g",
           "--executor-cores", "2",
           "--master", "local[2]",
-          "coverage_hoststack_rls2406.py"
+          "iterative_ndrpdr_rls2410.py"
         ]
         work_dir = "/local/csit/csit.infra.etl"
       }
diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-reconf-rls2406.hcl.tftpl b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-reconf-rls2406.hcl.tftpl
deleted file mode 100644 (file)
index d7f5daa..0000000
+++ /dev/null
@@ -1,55 +0,0 @@
-job "${job_name}" {
-  datacenters = "${datacenters}"
-  type        = "${type}"
-  periodic {
-    cron             = "${cron}"
-    prohibit_overlap = "${prohibit_overlap}"
-    time_zone        = "${time_zone}"
-  }
-  group "${job_name}" {
-    restart {
-      mode = "fail"
-    }
-    constraint {
-      attribute = "$${attr.cpu.arch}"
-      operator  = "!="
-      value     = "arm64"
-    }
-    constraint {
-      attribute = "$${node.class}"
-      value     = "builder"
-    }
-    task "${job_name}" {
-      artifact {
-        source      = "git::https://github.com/FDio/csit"
-        destination = "local/csit"
-      }
-      driver = "docker"
-      config {
-        image   = "${image}"
-        command = "gluesparksubmit"
-        args = [
-          "--driver-memory", "20g",
-          "--executor-memory", "20g",
-          "--executor-cores", "2",
-          "--master", "local[2]",
-          "iterative_reconf_rls2406.py"
-        ]
-        work_dir = "/local/csit/csit.infra.etl"
-      }
-      env {
-        AWS_ACCESS_KEY_ID         = "${aws_access_key_id}"
-        AWS_SECRET_ACCESS_KEY     = "${aws_secret_access_key}"
-        AWS_DEFAULT_REGION        = "${aws_default_region}"
-        OUT_AWS_ACCESS_KEY_ID     = "${out_aws_access_key_id}"
-        OUT_AWS_SECRET_ACCESS_KEY = "${out_aws_secret_access_key}"
-        OUT_AWS_DEFAULT_REGION    = "${out_aws_default_region}"
-        ${ envs }
-      }
-      resources {
-        cpu    = ${cpu}
-        memory = ${memory}
-      }
-    }
-  }
-}
diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-reconf-rls2410.hcl.tftpl b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-reconf-rls2410.hcl.tftpl
new file mode 100644 (file)
index 0000000..4337ef1
--- /dev/null
@@ -0,0 +1,55 @@
+job "${job_name}" {
+  datacenters = ["${datacenters}"]
+  type        = "${type}"
+  periodic {
+    cron             = "${cron}"
+    prohibit_overlap = "${prohibit_overlap}"
+    time_zone        = "${time_zone}"
+  }
+  group "${job_name}" {
+    restart {
+      mode = "fail"
+    }
+    constraint {
+      attribute = "$${attr.cpu.arch}"
+      operator  = "!="
+      value     = "arm64"
+    }
+    constraint {
+      attribute = "$${node.class}"
+      value     = "builder"
+    }
+    task "${job_name}" {
+      artifact {
+        source      = "git::https://github.com/FDio/csit"
+        destination = "local/csit"
+      }
+      driver = "docker"
+      config {
+        image   = "${image}"
+        command = "gluesparksubmit"
+        args = [
+          "--driver-memory", "20g",
+          "--executor-memory", "20g",
+          "--executor-cores", "2",
+          "--master", "local[2]",
+          "iterative_reconf_rls2410.py"
+        ]
+        work_dir = "/local/csit/csit.infra.etl"
+      }
+      env {
+        AWS_ACCESS_KEY_ID         = "${aws_access_key_id}"
+        AWS_SECRET_ACCESS_KEY     = "${aws_secret_access_key}"
+        AWS_DEFAULT_REGION        = "${aws_default_region}"
+        OUT_AWS_ACCESS_KEY_ID     = "${out_aws_access_key_id}"
+        OUT_AWS_SECRET_ACCESS_KEY = "${out_aws_secret_access_key}"
+        OUT_AWS_DEFAULT_REGION    = "${out_aws_default_region}"
+        ${ envs }
+      }
+      resources {
+        cpu    = ${cpu}
+        memory = ${memory}
+      }
+    }
+  }
+}
diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-soak-rls2406.hcl.tftpl b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-soak-rls2406.hcl.tftpl
deleted file mode 100644 (file)
index 3bf8505..0000000
+++ /dev/null
@@ -1,55 +0,0 @@
-job "${job_name}" {
-  datacenters = "${datacenters}"
-  type        = "${type}"
-  periodic {
-    cron             = "${cron}"
-    prohibit_overlap = "${prohibit_overlap}"
-    time_zone        = "${time_zone}"
-  }
-  group "${job_name}" {
-    restart {
-      mode = "fail"
-    }
-    constraint {
-      attribute = "$${attr.cpu.arch}"
-      operator  = "!="
-      value     = "arm64"
-    }
-    constraint {
-      attribute = "$${node.class}"
-      value     = "builder"
-    }
-    task "${job_name}" {
-      artifact {
-        source      = "git::https://github.com/FDio/csit"
-        destination = "local/csit"
-      }
-      driver = "docker"
-      config {
-        image   = "${image}"
-        command = "gluesparksubmit"
-        args = [
-          "--driver-memory", "20g",
-          "--executor-memory", "20g",
-          "--executor-cores", "2",
-          "--master", "local[2]",
-          "iterative_soak_rls2406.py"
-        ]
-        work_dir = "/local/csit/csit.infra.etl"
-      }
-      env {
-        AWS_ACCESS_KEY_ID         = "${aws_access_key_id}"
-        AWS_SECRET_ACCESS_KEY     = "${aws_secret_access_key}"
-        AWS_DEFAULT_REGION        = "${aws_default_region}"
-        OUT_AWS_ACCESS_KEY_ID     = "${out_aws_access_key_id}"
-        OUT_AWS_SECRET_ACCESS_KEY = "${out_aws_secret_access_key}"
-        OUT_AWS_DEFAULT_REGION    = "${out_aws_default_region}"
-        ${ envs }
-      }
-      resources {
-        cpu    = ${cpu}
-        memory = ${memory}
-      }
-    }
-  }
-}
@@ -1,5 +1,5 @@
 job "${job_name}" {
-  datacenters = "${datacenters}"
+  datacenters = ["${datacenters}"]
   type        = "${type}"
   periodic {
     cron             = "${cron}"
@@ -33,7 +33,7 @@ job "${job_name}" {
           "--executor-memory", "20g",
           "--executor-cores", "2",
           "--master", "local[2]",
-          "coverage_mrr_rls2406.py"
+          "iterative_soak_rls2410.py"
         ]
         work_dir = "/local/csit/csit.infra.etl"
       }
diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/fdio/main.tf b/fdio.infra.terraform/terraform-nomad-pyspark-etl/fdio/main.tf
deleted file mode 100644 (file)
index 026ab16..0000000
+++ /dev/null
@@ -1,292 +0,0 @@
-data "vault_kv_secret_v2" "fdio_logs" {
-  mount = "kv"
-  name  = "etl/fdio_logs"
-}
-
-data "vault_kv_secret_v2" "fdio_docs" {
-  mount = "kv"
-  name  = "etl/fdio_docs"
-}
-
-#data "vault_kv_secret_v2" "fdio_logs" {
-#  path = "kv/data/etl/fdio_logs"
-#}
-#
-#data "vault_kv_secret_v2" "fdio_docs" {
-#  path = "kv/data/etl/fdio_docs"
-#}
-
-module "etl-stats" {
-  providers = {
-    nomad = nomad.yul1
-  }
-  source = "../"
-
-  aws_access_key_id         = data.vault_kv_secret_v2.fdio_logs.data.access_key
-  aws_secret_access_key     = data.vault_kv_secret_v2.fdio_logs.data.secret_key
-  aws_default_region        = data.vault_kv_secret_v2.fdio_logs.data.region
-  out_aws_access_key_id     = data.vault_kv_secret_v2.fdio_docs.data.access_key
-  out_aws_secret_access_key = data.vault_kv_secret_v2.fdio_docs.data.secret_key
-  out_aws_default_region    = data.vault_kv_secret_v2.fdio_docs.data.region
-  cron                      = "0 30 0 * * * *"
-  datacenters               = ["yul1"]
-  job_name                  = "etl-stats"
-}
-
-module "etl-trending-hoststack" {
-  providers = {
-    nomad = nomad.yul1
-  }
-  source = "../"
-
-  aws_access_key_id         = data.vault_kv_secret_v2.fdio_logs.data.access_key
-  aws_secret_access_key     = data.vault_kv_secret_v2.fdio_logs.data.secret_key
-  aws_default_region        = data.vault_kv_secret_v2.fdio_logs.data.region
-  out_aws_access_key_id     = data.vault_kv_secret_v2.fdio_docs.data.access_key
-  out_aws_secret_access_key = data.vault_kv_secret_v2.fdio_docs.data.secret_key
-  out_aws_default_region    = data.vault_kv_secret_v2.fdio_docs.data.region
-  cron                      = "0 30 0 * * * *"
-  datacenters               = ["yul1"]
-  job_name                  = "etl-trending-hoststack"
-}
-
-module "etl-trending-mrr" {
-  providers = {
-    nomad = nomad.yul1
-  }
-  source = "../"
-
-  aws_access_key_id         = data.vault_kv_secret_v2.fdio_logs.data.access_key
-  aws_secret_access_key     = data.vault_kv_secret_v2.fdio_logs.data.secret_key
-  aws_default_region        = data.vault_kv_secret_v2.fdio_logs.data.region
-  out_aws_access_key_id     = data.vault_kv_secret_v2.fdio_docs.data.access_key
-  out_aws_secret_access_key = data.vault_kv_secret_v2.fdio_docs.data.secret_key
-  out_aws_default_region    = data.vault_kv_secret_v2.fdio_docs.data.region
-  cron                      = "0 30 0 * * * *"
-  datacenters               = ["yul1"]
-  job_name                  = "etl-trending-mrr"
-  memory                    = 60000
-}
-
-module "etl-trending-ndrpdr" {
-  providers = {
-    nomad = nomad.yul1
-  }
-  source = "../"
-
-  aws_access_key_id         = data.vault_kv_secret_v2.fdio_logs.data.access_key
-  aws_secret_access_key     = data.vault_kv_secret_v2.fdio_logs.data.secret_key
-  aws_default_region        = data.vault_kv_secret_v2.fdio_logs.data.region
-  out_aws_access_key_id     = data.vault_kv_secret_v2.fdio_docs.data.access_key
-  out_aws_secret_access_key = data.vault_kv_secret_v2.fdio_docs.data.secret_key
-  out_aws_default_region    = data.vault_kv_secret_v2.fdio_docs.data.region
-  cron                      = "0 30 0 * * * *"
-  datacenters               = ["yul1"]
-  job_name                  = "etl-trending-ndrpdr"
-  memory                    = 60000
-}
-
-module "etl-trending-soak" {
-  providers = {
-    nomad = nomad.yul1
-  }
-  source = "../"
-
-  aws_access_key_id         = data.vault_kv_secret_v2.fdio_logs.data.access_key
-  aws_secret_access_key     = data.vault_kv_secret_v2.fdio_logs.data.secret_key
-  aws_default_region        = data.vault_kv_secret_v2.fdio_logs.data.region
-  out_aws_access_key_id     = data.vault_kv_secret_v2.fdio_docs.data.access_key
-  out_aws_secret_access_key = data.vault_kv_secret_v2.fdio_docs.data.secret_key
-  out_aws_default_region    = data.vault_kv_secret_v2.fdio_docs.data.region
-  cron                      = "0 30 0 * * * *"
-  datacenters               = ["yul1"]
-  job_name                  = "etl-trending-soak"
-  memory                    = 60000
-}
-
-#module "etl-iterative-hoststack-rls2406" {
-#  providers = {
-#    nomad = nomad.yul1
-#  }
-#  source = "../"
-#
-#  aws_access_key_id         = data.vault_kv_secret_v2.fdio_logs.data["access_key"]
-#  aws_secret_access_key     = data.vault_kv_secret_v2.fdio_logs.data["secret_key"]
-#  aws_default_region        = data.vault_kv_secret_v2.fdio_logs.data["region"]
-#  out_aws_access_key_id     = data.vault_kv_secret_v2.fdio_docs.data["access_key"]
-#  out_aws_secret_access_key = data.vault_kv_secret_v2.fdio_docs.data["secret_key"]
-#  out_aws_default_region    = data.vault_kv_secret_v2.fdio_docs.data["region"]
-#  cron                      = "0 30 0 * * * *"
-#  datacenters               = ["yul1"]
-#  job_name                  = "etl-iterative-hoststack-rls2406"
-#}
-#
-#module "etl-iterative-mrr-rls2406" {
-#  providers = {
-#    nomad = nomad.yul1
-#  }
-#  source = "../"
-#
-#  aws_access_key_id         = data.vault_kv_secret_v2.fdio_logs.data["access_key"]
-#  aws_secret_access_key     = data.vault_kv_secret_v2.fdio_logs.data["secret_key"]
-#  aws_default_region        = data.vault_kv_secret_v2.fdio_logs.data["region"]
-#  out_aws_access_key_id     = data.vault_kv_secret_v2.fdio_docs.data["access_key"]
-#  out_aws_secret_access_key = data.vault_kv_secret_v2.fdio_docs.data["secret_key"]
-#  out_aws_default_region    = data.vault_kv_secret_v2.fdio_docs.data["region"]
-#  cron                      = "0 30 0 * * * *"
-#  datacenters               = ["yul1"]
-#  job_name                  = "etl-iterative-mrr-rls2406"
-#}
-#
-#module "etl-iterative-ndrpdr-rls2406" {
-#  providers = {
-#    nomad = nomad.yul1
-#  }
-#  source = "../"
-#
-#  aws_access_key_id         = data.vault_kv_secret_v2.fdio_logs.data["access_key"]
-#  aws_secret_access_key     = data.vault_kv_secret_v2.fdio_logs.data["secret_key"]
-#  aws_default_region        = data.vault_kv_secret_v2.fdio_logs.data["region"]
-#  out_aws_access_key_id     = data.vault_kv_secret_v2.fdio_docs.data["access_key"]
-#  out_aws_secret_access_key = data.vault_kv_secret_v2.fdio_docs.data["secret_key"]
-#  out_aws_default_region    = data.vault_kv_secret_v2.fdio_docs.data["region"]
-#  cron                      = "0 30 0 * * * *"
-#  datacenters               = ["yul1"]
-#  job_name                  = "etl-iterative-ndrpdr-rls2406"
-#}
-#
-#module "etl-iterative-reconf-rls2406" {
-#  providers = {
-#    nomad = nomad.yul1
-#  }
-#  source = "../"
-#
-#  aws_access_key_id         = data.vault_kv_secret_v2.fdio_logs.data["access_key"]
-#  aws_secret_access_key     = data.vault_kv_secret_v2.fdio_logs.data["secret_key"]
-#  aws_default_region        = data.vault_kv_secret_v2.fdio_logs.data["region"]
-#  out_aws_access_key_id     = data.vault_kv_secret_v2.fdio_docs.data["access_key"]
-#  out_aws_secret_access_key = data.vault_kv_secret_v2.fdio_docs.data["secret_key"]
-#  out_aws_default_region    = data.vault_kv_secret_v2.fdio_docs.data["region"]
-#  cron                      = "0 30 0 * * * *"
-#  datacenters               = ["yul1"]
-#  job_name                  = "etl-iterative-reconf-rls2406"
-#}
-#
-#module "etl-iterative-soak-rls2406" {
-#  providers = {
-#    nomad = nomad.yul1
-#  }
-#  source = "../"
-#
-#  aws_access_key_id         = data.vault_kv_secret_v2.fdio_logs.data["access_key"]
-#  aws_secret_access_key     = data.vault_kv_secret_v2.fdio_logs.data["secret_key"]
-#  aws_default_region        = data.vault_kv_secret_v2.fdio_logs.data["region"]
-#  out_aws_access_key_id     = data.vault_kv_secret_v2.fdio_docs.data["access_key"]
-#  out_aws_secret_access_key = data.vault_kv_secret_v2.fdio_docs.data["secret_key"]
-#  out_aws_default_region    = data.vault_kv_secret_v2.fdio_docs.data["region"]
-#  cron                      = "0 30 0 * * * *"
-#  datacenters               = ["yul1"]
-#  job_name                  = "etl-iterative-soak-rls2406"
-#}
-#
-#module "etl-coverage-device-rls2406" {
-#  providers = {
-#    nomad = nomad.yul1
-#  }
-#  source = "../"
-#
-#  aws_access_key_id         = data.vault_kv_secret_v2.fdio_logs.data["access_key"]
-#  aws_secret_access_key     = data.vault_kv_secret_v2.fdio_logs.data["secret_key"]
-#  aws_default_region        = data.vault_kv_secret_v2.fdio_logs.data["region"]
-#  out_aws_access_key_id     = data.vault_kv_secret_v2.fdio_docs.data["access_key"]
-#  out_aws_secret_access_key = data.vault_kv_secret_v2.fdio_docs.data["secret_key"]
-#  out_aws_default_region    = data.vault_kv_secret_v2.fdio_docs.data["region"]
-#  cron                      = "0 30 0 * * * *"
-#  datacenters               = ["yul1"]
-#  job_name                  = "etl-coverage-device-rls2406"
-#}
-#
-#module "etl-coverage-hoststack-rls2406" {
-#  providers = {
-#    nomad = nomad.yul1
-#  }
-#  source = "../"
-#
-#  aws_access_key_id         = data.vault_kv_secret_v2.fdio_logs.data["access_key"]
-#  aws_secret_access_key     = data.vault_kv_secret_v2.fdio_logs.data["secret_key"]
-#  aws_default_region        = data.vault_kv_secret_v2.fdio_logs.data["region"]
-#  out_aws_access_key_id     = data.vault_kv_secret_v2.fdio_docs.data["access_key"]
-#  out_aws_secret_access_key = data.vault_kv_secret_v2.fdio_docs.data["secret_key"]
-#  out_aws_default_region    = data.vault_kv_secret_v2.fdio_docs.data["region"]
-#  cron                      = "0 30 0 * * * *"
-#  datacenters               = ["yul1"]
-#  job_name                  = "etl-coverage-hoststack-rls2406"
-#}
-#
-#module "etl-coverage-mrr-rls2406" {
-#  providers = {
-#    nomad = nomad.yul1
-#  }
-#  source = "../"
-#
-#  aws_access_key_id         = data.vault_kv_secret_v2.fdio_logs.data["access_key"]
-#  aws_secret_access_key     = data.vault_kv_secret_v2.fdio_logs.data["secret_key"]
-#  aws_default_region        = data.vault_kv_secret_v2.fdio_logs.data["region"]
-#  out_aws_access_key_id     = data.vault_kv_secret_v2.fdio_docs.data["access_key"]
-#  out_aws_secret_access_key = data.vault_kv_secret_v2.fdio_docs.data["secret_key"]
-#  out_aws_default_region    = data.vault_kv_secret_v2.fdio_docs.data["region"]
-#  cron                      = "0 30 0 * * * *"
-#  datacenters               = ["yul1"]
-#  job_name                  = "etl-coverage-mrr-rls2406"
-#}
-#
-#module "etl-coverage-ndrpdr-rls2406" {
-#  providers = {
-#    nomad = nomad.yul1
-#  }
-#  source = "../"
-#
-#  aws_access_key_id         = data.vault_kv_secret_v2.fdio_logs.data["access_key"]
-#  aws_secret_access_key     = data.vault_kv_secret_v2.fdio_logs.data["secret_key"]
-#  aws_default_region        = data.vault_kv_secret_v2.fdio_logs.data["region"]
-#  out_aws_access_key_id     = data.vault_kv_secret_v2.fdio_docs.data["access_key"]
-#  out_aws_secret_access_key = data.vault_kv_secret_v2.fdio_docs.data["secret_key"]
-#  out_aws_default_region    = data.vault_kv_secret_v2.fdio_docs.data["region"]
-#  cron                      = "0 30 0 * * * *"
-#  datacenters               = ["yul1"]
-#  job_name                  = "etl-coverage-ndrpdr-rls2406"
-#}
-#
-#module "etl-coverage-reconf-rls2406" {
-#  providers = {
-#    nomad = nomad.yul1
-#  }
-#  source = "../"
-#
-#  aws_access_key_id         = data.vault_kv_secret_v2.fdio_logs.data["access_key"]
-#  aws_secret_access_key     = data.vault_kv_secret_v2.fdio_logs.data["secret_key"]
-#  aws_default_region        = data.vault_kv_secret_v2.fdio_logs.data["region"]
-#  out_aws_access_key_id     = data.vault_kv_secret_v2.fdio_docs.data["access_key"]
-#  out_aws_secret_access_key = data.vault_kv_secret_v2.fdio_docs.data["secret_key"]
-#  out_aws_default_region    = data.vault_kv_secret_v2.fdio_docs.data["region"]
-#  cron                      = "0 30 0 * * * *"
-#  datacenters               = ["yul1"]
-#  job_name                  = "etl-coverage-reconf-rls2406"
-#}
-#
-#module "etl-coverage-soak-rls2406" {
-#  providers = {
-#    nomad = nomad.yul1
-#  }
-#  source = "../"
-#
-#  aws_access_key_id         = data.vault_kv_secret_v2.fdio_logs.data["access_key"]
-#  aws_secret_access_key     = data.vault_kv_secret_v2.fdio_logs.data["secret_key"]
-#  aws_default_region        = data.vault_kv_secret_v2.fdio_logs.data["region"]
-#  out_aws_access_key_id     = data.vault_kv_secret_v2.fdio_docs.data["access_key"]
-#  out_aws_secret_access_key = data.vault_kv_secret_v2.fdio_docs.data["secret_key"]
-#  out_aws_default_region    = data.vault_kv_secret_v2.fdio_docs.data["region"]
-#  cron                      = "0 30 0 * * * *"
-#  datacenters               = ["yul1"]
-#  job_name                  = "etl-coverage-soak-rls2406"
-#}
diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/fdio/variables.tf b/fdio.infra.terraform/terraform-nomad-pyspark-etl/fdio/variables.tf
deleted file mode 100644 (file)
index 60298d4..0000000
+++ /dev/null
@@ -1,47 +0,0 @@
-variable "nomad_acl" {
-  description = "Nomad ACLs enabled/disabled."
-  type        = bool
-  default     = false
-}
-
-variable "nomad_provider_address" {
-  description = "FD.io Nomad cluster address."
-  type        = string
-  default     = "http://10.30.51.23:4646"
-}
-
-variable "nomad_provider_ca_file" {
-  description = "A local file path to a PEM-encoded certificate authority."
-  type        = string
-  default     = "/etc/nomad.d/ssl/nomad-ca.pem"
-}
-
-variable "nomad_provider_cert_file" {
-  description = "A local file path to a PEM-encoded certificate."
-  type        = string
-  default     = "/etc/nomad.d/ssl/nomad.pem"
-}
-
-variable "nomad_provider_key_file" {
-  description = "A local file path to a PEM-encoded private key."
-  type        = string
-  default     = "/etc/nomad.d/ssl/nomad-key.pem"
-}
-
-variable "vault_provider_address" {
-  description = "Vault cluster address."
-  type        = string
-  default     = "http://10.30.51.23:8200"
-}
-
-variable "vault_provider_skip_tls_verify" {
-  description = "Verification of the Vault server's TLS certificate."
-  type        = bool
-  default     = false
-}
-
-variable "vault_provider_token" {
-  description = "Vault root token."
-  type        = string
-  sensitive   = true
-}
diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/fdio/versions.tf b/fdio.infra.terraform/terraform-nomad-pyspark-etl/fdio/versions.tf
deleted file mode 100644 (file)
index ffe25bb..0000000
+++ /dev/null
@@ -1,17 +0,0 @@
-terraform {
-  backend "consul" {
-    address = "10.30.51.23:8500"
-    scheme  = "http"
-    path    = "terraform/etl"
-  }
-  required_providers {
-    nomad = {
-      source  = "hashicorp/nomad"
-      version = ">= 2.3.0"
-    }
-    vault = {
-      version = ">= 4.3.0"
-    }
-  }
-  required_version = ">= 1.5.4"
-}
index cd6a9a5..57baa24 100644 (file)
@@ -1,33 +1,28 @@
-locals {
-  datacenters = join(",", var.datacenters)
-  envs        = join("\n", concat([], var.envs))
+data "vault_kv_secret_v2" "fdio_logs" {
+  mount = "kv"
+  name  = "etl/fdio_logs"
 }
 
-resource "nomad_job" "nomad_job" {
-  jobspec = templatefile(
-    "${path.module}/conf/nomad/${var.job_name}.hcl.tftpl",
-    {
-      aws_access_key_id         = var.aws_access_key_id,
-      aws_secret_access_key     = var.aws_secret_access_key,
-      aws_default_region        = var.aws_default_region
-      cpu                       = var.cpu,
-      cron                      = var.cron,
-      datacenters               = local.datacenters,
-      envs                      = local.envs,
-      image                     = var.image,
-      job_name                  = var.job_name,
-      memory                    = var.memory,
-      out_aws_access_key_id     = var.out_aws_access_key_id,
-      out_aws_secret_access_key = var.out_aws_secret_access_key,
-      out_aws_default_region    = var.out_aws_default_region
-      prohibit_overlap          = var.prohibit_overlap,
-      time_zone                 = var.time_zone,
-      type                      = var.type,
-      use_vault_provider        = var.vault_secret.use_vault_provider,
-      vault_kv_policy_name      = var.vault_secret.vault_kv_policy_name,
-      vault_kv_path             = var.vault_secret.vault_kv_path,
-      vault_kv_field_access_key = var.vault_secret.vault_kv_field_access_key,
-      vault_kv_field_secret_key = var.vault_secret.vault_kv_field_secret_key
-  })
-  detach = false
+data "vault_kv_secret_v2" "fdio_docs" {
+  mount = "kv"
+  name  = "etl/fdio_docs"
 }
+
+module "etl" {
+  for_each = { for job in var.nomad_jobs : job.job_name => job }
+  providers = {
+    nomad = nomad.yul1
+  }
+  source = "../terraform-nomad-nomad-job"
+
+  aws_access_key_id         = data.vault_kv_secret_v2.fdio_logs.data.access_key
+  aws_secret_access_key     = data.vault_kv_secret_v2.fdio_logs.data.secret_key
+  aws_default_region        = data.vault_kv_secret_v2.fdio_logs.data.region
+  out_aws_access_key_id     = data.vault_kv_secret_v2.fdio_docs.data.access_key
+  out_aws_secret_access_key = data.vault_kv_secret_v2.fdio_docs.data.secret_key
+  out_aws_default_region    = data.vault_kv_secret_v2.fdio_docs.data.region
+  cron                      = "0 30 0 * * * *"
+  datacenters               = ["yul1"]
+  job_name                  = each.key
+  memory                    = each.value.memory
+}
\ No newline at end of file
index 86d1b45..e8ddeb0 100644 (file)
-# Nomad
-variable "datacenters" {
-  description = "Specifies the list of DCs to be considered placing this task."
-  type        = list(string)
-  default     = ["dc1"]
-}
-
-# ETL
-variable "aws_access_key_id" {
-  description = "AWS access key."
-  type        = string
-  default     = "aws"
-}
-
-variable "aws_secret_access_key" {
-  description = "AWS secret key"
-  type        = string
-  default     = "aws"
-}
-
-variable "aws_default_region" {
-  description = "AWS region"
-  type        = string
-  default     = "aws"
-}
-
-variable "cpu" {
-  description = "Specifies the CPU required to run this task in MHz."
-  type        = number
-  default     = 10000
-}
-
-variable "cron" {
-  description = "Specifies a cron expression configuring the interval to launch."
-  type        = string
-  default     = "@daily"
-}
-
-variable "envs" {
-  description = "Specifies ETL environment variables."
-  type        = list(string)
-  default     = []
+variable "nomad_acl" {
+  description = "Nomad ACLs enabled/disabled."
+  type        = bool
+  default     = false
 }
 
-variable "image" {
-  description = "Specifies the Docker image to run."
+variable "nomad_provider_address" {
+  description = "FD.io Nomad cluster address."
   type        = string
-  default     = "pmikus/docker-ubuntu-focal-aws-glue:latest"
+  default     = "http://10.30.51.23:4646"
 }
 
-variable "job_name" {
-  description = "Specifies a name for the job."
+variable "nomad_provider_ca_file" {
+  description = "A local file path to a PEM-encoded certificate authority."
   type        = string
-  default     = "etl"
-}
-
-variable "memory" {
-  description = "Specifies the memory required in MB."
-  type        = number
-  default     = 50000
+  default     = "/etc/nomad.d/ssl/nomad-ca.pem"
 }
 
-variable "out_aws_access_key_id" {
-  description = "AWS access key."
+variable "nomad_provider_cert_file" {
+  description = "A local file path to a PEM-encoded certificate."
   type        = string
-  default     = "aws"
+  default     = "/etc/nomad.d/ssl/nomad.pem"
 }
 
-variable "out_aws_secret_access_key" {
-  description = "AWS secret key"
+variable "nomad_provider_key_file" {
+  description = "A local file path to a PEM-encoded private key."
   type        = string
-  default     = "aws"
+  default     = "/etc/nomad.d/ssl/nomad-key.pem"
 }
 
-variable "out_aws_default_region" {
-  description = "AWS region"
+variable "vault_provider_address" {
+  description = "Vault cluster address."
   type        = string
-  default     = "aws"
+  default     = "http://10.30.51.23:8200"
 }
 
-variable "prohibit_overlap" {
-  description = "Specifies if this job should wait until previous completed."
+variable "vault_provider_skip_tls_verify" {
+  description = "Verification of the Vault server's TLS certificate."
   type        = bool
-  default     = true
+  default     = false
 }
 
-variable "time_zone" {
-  description = "Specifies the time zone to evaluate the next launch interval."
+variable "vault_provider_token" {
+  description = "Vault root token."
   type        = string
-  default     = "UTC"
-}
-
-variable "type" {
-  description = "Specifies the Nomad scheduler to use."
-  type        = string
-  default     = "batch"
-}
-
-variable "vault_secret" {
-  type = object({
-    use_vault_provider        = bool,
-    vault_kv_policy_name      = string,
-    vault_kv_path             = string,
-    vault_kv_field_access_key = string,
-    vault_kv_field_secret_key = string
-  })
-  description = "Set of properties to be able to fetch secret from vault."
-  default = {
-    use_vault_provider        = true
-    vault_kv_policy_name      = "kv"
-    vault_kv_path             = "data/etl"
-    vault_kv_field_access_key = "access_key"
-    vault_kv_field_secret_key = "secret_key"
-  }
-}
+  sensitive   = true
+}
+
+variable "nomad_jobs" {
+  description = "List of ETL jobs"
+  type        = list(map(any))
+  default = [
+    {
+      job_name = "etl-stats"
+      memory = 50000
+    },
+    {
+      job_name = "etl-trending-hoststack"
+      memory = 50000
+    },
+    {
+      job_name = "etl-iterative-hoststack-rls2410"
+      memory = 50000
+    },
+    {
+      job_name = "etl-iterative-mrr-rls2410"
+      memory = 50000
+    },
+    {
+      job_name = "etl-iterative-ndrpdr-rls2410"
+      memory = 50000
+    },
+    {
+      job_name = "etl-iterative-reconf-rls2410"
+      memory = 50000
+    },
+    {
+      job_name = "etl-iterative-soak-rls2410"
+      memory = 50000
+    },
+    {
+      job_name = "etl-coverage-device-rls2410"
+      memory = 50000
+    },
+    {
+      job_name = "etl-coverage-hoststack-rls2410"
+      memory = 50000
+    },
+    {
+      job_name = "etl-coverage-mrr-rls2410"
+      memory = 50000
+    },
+    {
+      job_name = "etl-coverage-ndrpdr-rls2410"
+      memory = 50000
+    },
+    {
+      job_name = "etl-coverage-reconf-rls2410"
+      memory = 50000
+    },
+    {
+      job_name = "etl-coverage-soak-rls2410"
+      memory = 50000
+    },
+    {
+      job_name = "etl-trending-mrr"
+      memory = 60000
+    },
+    {
+      job_name = "etl-trending-ndrpdr"
+      memory = 60000
+    },
+    {
+      job_name = "etl-trending-soak"
+      memory = 60000
+    }
+  ]
+}
\ No newline at end of file
index f40435f..ffe25bb 100644 (file)
@@ -1,8 +1,16 @@
 terraform {
+  backend "consul" {
+    address = "10.30.51.23:8500"
+    scheme  = "http"
+    path    = "terraform/etl"
+  }
   required_providers {
     nomad = {
       source  = "hashicorp/nomad"
-      version = ">= 1.4.20"
+      version = ">= 2.3.0"
+    }
+    vault = {
+      version = ">= 4.3.0"
     }
   }
   required_version = ">= 1.5.4"