feat(terraform): Optimize ETL execution
[csit.git] / fdio.infra.terraform / terraform-nomad-pyspark-etl / conf / nomad / etl.hcl.tftpl
diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl.hcl.tftpl b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl.hcl.tftpl
deleted file mode 100644 (file)
index b5b43e5..0000000
+++ /dev/null
@@ -1,597 +0,0 @@
-job "${job_name}" {
-  datacenters = "${datacenters}"
-  type        = "${type}"
-  periodic {
-    cron             = "${cron}"
-    prohibit_overlap = "${prohibit_overlap}"
-    time_zone        = "${time_zone}"
-  }
-  group "${job_name}-trending" {
-    restart {
-      mode = "fail"
-    }
-    constraint {
-      attribute       = "$${attr.cpu.arch}"
-      operator        = "!="
-      value           = "arm64"
-    }
-    constraint {
-      attribute      = "$${node.class}"
-      value          = "builder"
-    }
-    task "${job_name}-trending-mrr" {
-      artifact {
-        source      = "git::https://github.com/FDio/csit"
-        destination = "local/csit"
-      }
-      driver = "docker"
-      config {
-        image   = "${image}"
-        command = "gluesparksubmit"
-        args = [
-          "--driver-memory", "30g",
-          "--executor-memory", "30g",
-          "trending_mrr.py"
-        ]
-        work_dir = "/local/csit/csit.infra.etl"
-      }
-      env {
-        AWS_ACCESS_KEY_ID         = "${aws_access_key_id}"
-        AWS_SECRET_ACCESS_KEY     = "${aws_secret_access_key}"
-        AWS_DEFAULT_REGION        = "${aws_default_region}"
-        OUT_AWS_ACCESS_KEY_ID     = "${out_aws_access_key_id}"
-        OUT_AWS_SECRET_ACCESS_KEY = "${out_aws_secret_access_key}"
-        OUT_AWS_DEFAULT_REGION    = "${out_aws_default_region}"
-        ${ envs }
-      }
-      resources {
-        cpu    = ${cpu}
-        memory = ${memory}
-      }
-    }
-    task "${job_name}-trending-ndrpdr" {
-      artifact {
-        source      = "git::https://github.com/FDio/csit"
-        destination = "local/csit"
-      }
-      driver = "docker"
-      config {
-        image   = "${image}"
-        command = "gluesparksubmit"
-        args = [
-          "--driver-memory", "30g",
-          "--executor-memory", "30g",
-          "trending_ndrpdr.py"
-        ]
-        work_dir = "/local/csit/csit.infra.etl"
-      }
-      env {
-        AWS_ACCESS_KEY_ID         = "${aws_access_key_id}"
-        AWS_SECRET_ACCESS_KEY     = "${aws_secret_access_key}"
-        AWS_DEFAULT_REGION        = "${aws_default_region}"
-        OUT_AWS_ACCESS_KEY_ID     = "${out_aws_access_key_id}"
-        OUT_AWS_SECRET_ACCESS_KEY = "${out_aws_secret_access_key}"
-        OUT_AWS_DEFAULT_REGION    = "${out_aws_default_region}"
-        ${ envs }
-      }
-      resources {
-        cpu    = ${cpu}
-        memory = ${memory}
-      }
-    }
-    task "${job_name}-trending-soak" {
-      artifact {
-        source      = "git::https://github.com/FDio/csit"
-        destination = "local/csit"
-      }
-      driver = "docker"
-      config {
-        image   = "${image}"
-        command = "gluesparksubmit"
-        args = [
-          "--driver-memory", "30g",
-          "--executor-memory", "30g",
-          "trending_soak.py"
-        ]
-        work_dir = "/local/csit/csit.infra.etl"
-      }
-      env {
-        AWS_ACCESS_KEY_ID         = "${aws_access_key_id}"
-        AWS_SECRET_ACCESS_KEY     = "${aws_secret_access_key}"
-        AWS_DEFAULT_REGION        = "${aws_default_region}"
-        OUT_AWS_ACCESS_KEY_ID     = "${out_aws_access_key_id}"
-        OUT_AWS_SECRET_ACCESS_KEY = "${out_aws_secret_access_key}"
-        OUT_AWS_DEFAULT_REGION    = "${out_aws_default_region}"
-        ${ envs }
-      }
-      resources {
-        cpu    = ${cpu}
-        memory = ${memory}
-      }
-    }
-    task "${job_name}-trending-reconf" {
-      artifact {
-        source      = "git::https://github.com/FDio/csit"
-        destination = "local/csit"
-      }
-      driver = "docker"
-      config {
-        image   = "${image}"
-        command = "gluesparksubmit"
-        args = [
-          "--driver-memory", "30g",
-          "--executor-memory", "30g",
-          "trending_reconf.py"
-        ]
-        work_dir = "/local/csit/csit.infra.etl"
-      }
-      env {
-        AWS_ACCESS_KEY_ID         = "${aws_access_key_id}"
-        AWS_SECRET_ACCESS_KEY     = "${aws_secret_access_key}"
-        AWS_DEFAULT_REGION        = "${aws_default_region}"
-        OUT_AWS_ACCESS_KEY_ID     = "${out_aws_access_key_id}"
-        OUT_AWS_SECRET_ACCESS_KEY = "${out_aws_secret_access_key}"
-        OUT_AWS_DEFAULT_REGION    = "${out_aws_default_region}"
-        ${ envs }
-      }
-      resources {
-        cpu    = ${cpu}
-        memory = ${memory}
-      }
-    }
-    task "${job_name}-trending-hoststack" {
-      artifact {
-        source      = "git::https://github.com/FDio/csit"
-        destination = "local/csit"
-      }
-      driver = "docker"
-      config {
-        image   = "${image}"
-        command = "gluesparksubmit"
-        args = [
-          "--driver-memory", "30g",
-          "--executor-memory", "30g",
-          "trending_hoststack.py"
-        ]
-        work_dir = "/local/csit/csit.infra.etl"
-      }
-      env {
-        AWS_ACCESS_KEY_ID         = "${aws_access_key_id}"
-        AWS_SECRET_ACCESS_KEY     = "${aws_secret_access_key}"
-        AWS_DEFAULT_REGION        = "${aws_default_region}"
-        OUT_AWS_ACCESS_KEY_ID     = "${out_aws_access_key_id}"
-        OUT_AWS_SECRET_ACCESS_KEY = "${out_aws_secret_access_key}"
-        OUT_AWS_DEFAULT_REGION    = "${out_aws_default_region}"
-        ${ envs }
-      }
-      resources {
-        cpu    = ${cpu}
-        memory = ${memory}
-      }
-    }
-  }
-  group "${job_name}-stats" {
-    restart {
-      mode = "fail"
-    }
-    constraint {
-      attribute       = "$${attr.cpu.arch}"
-      operator        = "!="
-      value           = "arm64"
-    }
-    constraint {
-      attribute      = "$${node.class}"
-      value          = "builder"
-    }
-    task "${job_name}-stats" {
-      artifact {
-        source      = "git::https://github.com/FDio/csit"
-        destination = "local/csit"
-      }
-      driver = "docker"
-      config {
-        image   = "${image}"
-        command = "gluesparksubmit"
-        args = [
-          "--driver-memory", "10g",
-          "--executor-memory", "10g",
-          "stats.py"
-        ]
-        work_dir = "/local/csit/csit.infra.etl"
-      }
-      env {
-        AWS_ACCESS_KEY_ID         = "${aws_access_key_id}"
-        AWS_SECRET_ACCESS_KEY     = "${aws_secret_access_key}"
-        AWS_DEFAULT_REGION        = "${aws_default_region}"
-        OUT_AWS_ACCESS_KEY_ID     = "${out_aws_access_key_id}"
-        OUT_AWS_SECRET_ACCESS_KEY = "${out_aws_secret_access_key}"
-        OUT_AWS_DEFAULT_REGION    = "${out_aws_default_region}"
-        ${ envs }
-      }
-      resources {
-        cpu    = ${cpu}
-        memory = ${memory}
-      }
-    }
-  }
-  group "${job_name}-coverage-rls2302" {
-    restart {
-      mode = "fail"
-    }
-    constraint {
-      attribute = "$${attr.cpu.arch}"
-      operator  = "!="
-      value     = "arm64"
-    }
-    constraint {
-      attribute = "$${node.class}"
-      value     = "builder"
-    }
-    task "${job_name}-coverage-device" {
-      artifact {
-        source      = "git::https://github.com/FDio/csit"
-        destination = "local/csit"
-      }
-      driver = "docker"
-      config {
-        image   = "${image}"
-        command = "gluesparksubmit"
-        args = [
-          "--driver-memory", "20g",
-          "--executor-memory", "20g",
-          "--executor-cores", "2",
-          "--master", "local[2]",
-          "coverage_device_rls2302.py"
-        ]
-        work_dir = "/local/csit/csit.infra.etl"
-      }
-      env {
-        AWS_ACCESS_KEY_ID         = "${aws_access_key_id}"
-        AWS_SECRET_ACCESS_KEY     = "${aws_secret_access_key}"
-        AWS_DEFAULT_REGION        = "${aws_default_region}"
-        OUT_AWS_ACCESS_KEY_ID     = "${out_aws_access_key_id}"
-        OUT_AWS_SECRET_ACCESS_KEY = "${out_aws_secret_access_key}"
-        OUT_AWS_DEFAULT_REGION    = "${out_aws_default_region}"
-        ${ envs }
-      }
-      resources {
-        cpu    = ${cpu}
-        memory = ${memory}
-      }
-    }
-    task "${job_name}-coverage-hoststack" {
-      artifact {
-        source      = "git::https://github.com/FDio/csit"
-        destination = "local/csit"
-      }
-      driver = "docker"
-      config {
-        image   = "${image}"
-        command = "gluesparksubmit"
-        args = [
-          "--driver-memory", "20g",
-          "--executor-memory", "20g",
-          "--executor-cores", "2",
-          "--master", "local[2]",
-          "coverage_hoststack_rls2302.py"
-        ]
-        work_dir = "/local/csit/csit.infra.etl"
-      }
-      env {
-        AWS_ACCESS_KEY_ID         = "${aws_access_key_id}"
-        AWS_SECRET_ACCESS_KEY     = "${aws_secret_access_key}"
-        AWS_DEFAULT_REGION        = "${aws_default_region}"
-        OUT_AWS_ACCESS_KEY_ID     = "${out_aws_access_key_id}"
-        OUT_AWS_SECRET_ACCESS_KEY = "${out_aws_secret_access_key}"
-        OUT_AWS_DEFAULT_REGION    = "${out_aws_default_region}"
-        ${ envs }
-      }
-      resources {
-        cpu    = ${cpu}
-        memory = ${memory}
-      }
-    }
-    task "${job_name}-coverage-mrr" {
-      artifact {
-        source      = "git::https://github.com/FDio/csit"
-        destination = "local/csit"
-      }
-      driver = "docker"
-      config {
-        image   = "${image}"
-        command = "gluesparksubmit"
-        args = [
-          "--driver-memory", "20g",
-          "--executor-memory", "20g",
-          "--executor-cores", "2",
-          "--master", "local[2]",
-          "coverage_mrr_rls2302.py"
-        ]
-        work_dir = "/local/csit/csit.infra.etl"
-      }
-      env {
-        AWS_ACCESS_KEY_ID         = "${aws_access_key_id}"
-        AWS_SECRET_ACCESS_KEY     = "${aws_secret_access_key}"
-        AWS_DEFAULT_REGION        = "${aws_default_region}"
-        OUT_AWS_ACCESS_KEY_ID     = "${out_aws_access_key_id}"
-        OUT_AWS_SECRET_ACCESS_KEY = "${out_aws_secret_access_key}"
-        OUT_AWS_DEFAULT_REGION    = "${out_aws_default_region}"
-        ${ envs }
-      }
-      resources {
-        cpu    = ${cpu}
-        memory = ${memory}
-      }
-    }
-    task "${job_name}-coverage-ndrpdr" {
-      artifact {
-        source      = "git::https://github.com/FDio/csit"
-        destination = "local/csit"
-      }
-      driver = "docker"
-      config {
-        image   = "${image}"
-        command = "gluesparksubmit"
-        args = [
-          "--driver-memory", "20g",
-          "--executor-memory", "20g",
-          "--executor-cores", "2",
-          "--master", "local[2]",
-          "coverage_ndrpdr_rls2302.py"
-        ]
-        work_dir = "/local/csit/csit.infra.etl"
-      }
-      env {
-        AWS_ACCESS_KEY_ID         = "${aws_access_key_id}"
-        AWS_SECRET_ACCESS_KEY     = "${aws_secret_access_key}"
-        AWS_DEFAULT_REGION        = "${aws_default_region}"
-        OUT_AWS_ACCESS_KEY_ID     = "${out_aws_access_key_id}"
-        OUT_AWS_SECRET_ACCESS_KEY = "${out_aws_secret_access_key}"
-        OUT_AWS_DEFAULT_REGION    = "${out_aws_default_region}"
-        ${ envs }
-      }
-      resources {
-        cpu    = ${cpu}
-        memory = ${memory}
-      }
-    }
-    task "${job_name}-coverage-reconf" {
-      artifact {
-        source      = "git::https://github.com/FDio/csit"
-        destination = "local/csit"
-      }
-      driver = "docker"
-      config {
-        image   = "${image}"
-        command = "gluesparksubmit"
-        args = [
-          "--driver-memory", "20g",
-          "--executor-memory", "20g",
-          "--executor-cores", "2",
-          "--master", "local[2]",
-          "coverage_reconf_rls2302.py"
-        ]
-        work_dir = "/local/csit/csit.infra.etl"
-      }
-      env {
-        AWS_ACCESS_KEY_ID         = "${aws_access_key_id}"
-        AWS_SECRET_ACCESS_KEY     = "${aws_secret_access_key}"
-        AWS_DEFAULT_REGION        = "${aws_default_region}"
-        OUT_AWS_ACCESS_KEY_ID     = "${out_aws_access_key_id}"
-        OUT_AWS_SECRET_ACCESS_KEY = "${out_aws_secret_access_key}"
-        OUT_AWS_DEFAULT_REGION    = "${out_aws_default_region}"
-        ${ envs }
-      }
-      resources {
-        cpu    = ${cpu}
-        memory = ${memory}
-      }
-    }
-    task "${job_name}-coverage-soak" {
-      artifact {
-        source      = "git::https://github.com/FDio/csit"
-        destination = "local/csit"
-      }
-      driver = "docker"
-      config {
-        image   = "${image}"
-        command = "gluesparksubmit"
-        args = [
-          "--driver-memory", "20g",
-          "--executor-memory", "20g",
-          "--executor-cores", "2",
-          "--master", "local[2]",
-          "coverage_soak_rls2302.py"
-        ]
-        work_dir = "/local/csit/csit.infra.etl"
-      }
-      env {
-        AWS_ACCESS_KEY_ID         = "${aws_access_key_id}"
-        AWS_SECRET_ACCESS_KEY     = "${aws_secret_access_key}"
-        AWS_DEFAULT_REGION        = "${aws_default_region}"
-        OUT_AWS_ACCESS_KEY_ID     = "${out_aws_access_key_id}"
-        OUT_AWS_SECRET_ACCESS_KEY = "${out_aws_secret_access_key}"
-        OUT_AWS_DEFAULT_REGION    = "${out_aws_default_region}"
-        ${ envs }
-      }
-      resources {
-        cpu    = ${cpu}
-        memory = ${memory}
-      }
-    }
-  }
-  group "${job_name}-iterative-rls2302" {
-    restart {
-      mode = "fail"
-    }
-    constraint {
-      attribute = "$${attr.cpu.arch}"
-      operator  = "!="
-      value     = "arm64"
-    }
-    constraint {
-      attribute = "$${node.class}"
-      value     = "builder"
-    }
-    task "${job_name}-iterative-hoststack" {
-      artifact {
-        source      = "git::https://github.com/FDio/csit"
-        destination = "local/csit"
-      }
-      driver = "docker"
-      config {
-        image   = "${image}"
-        command = "gluesparksubmit"
-        args = [
-          "--driver-memory", "20g",
-          "--executor-memory", "20g",
-          "--executor-cores", "2",
-          "--master", "local[2]",
-          "iterative_hoststack_rls2302.py"
-        ]
-        work_dir = "/local/csit/csit.infra.etl"
-      }
-      env {
-        AWS_ACCESS_KEY_ID         = "${aws_access_key_id}"
-        AWS_SECRET_ACCESS_KEY     = "${aws_secret_access_key}"
-        AWS_DEFAULT_REGION        = "${aws_default_region}"
-        OUT_AWS_ACCESS_KEY_ID     = "${out_aws_access_key_id}"
-        OUT_AWS_SECRET_ACCESS_KEY = "${out_aws_secret_access_key}"
-        OUT_AWS_DEFAULT_REGION    = "${out_aws_default_region}"
-        ${ envs }
-      }
-      resources {
-        cpu    = ${cpu}
-        memory = ${memory}
-      }
-    }
-    task "${job_name}-iterative-mrr" {
-      artifact {
-        source      = "git::https://github.com/FDio/csit"
-        destination = "local/csit"
-      }
-      driver = "docker"
-      config {
-        image   = "${image}"
-        command = "gluesparksubmit"
-        args = [
-          "--driver-memory", "20g",
-          "--executor-memory", "20g",
-          "--executor-cores", "2",
-          "--master", "local[2]",
-          "iterative_mrr_rls2302.py"
-        ]
-        work_dir = "/local/csit/csit.infra.etl"
-      }
-      env {
-        AWS_ACCESS_KEY_ID         = "${aws_access_key_id}"
-        AWS_SECRET_ACCESS_KEY     = "${aws_secret_access_key}"
-        AWS_DEFAULT_REGION        = "${aws_default_region}"
-        OUT_AWS_ACCESS_KEY_ID     = "${out_aws_access_key_id}"
-        OUT_AWS_SECRET_ACCESS_KEY = "${out_aws_secret_access_key}"
-        OUT_AWS_DEFAULT_REGION    = "${out_aws_default_region}"
-        ${ envs }
-      }
-      resources {
-        cpu    = ${cpu}
-        memory = ${memory}
-      }
-    }
-    task "${job_name}-iterative-ndrpdr" {
-      artifact {
-        source      = "git::https://github.com/FDio/csit"
-        destination = "local/csit"
-      }
-      driver = "docker"
-      config {
-        image   = "${image}"
-        command = "gluesparksubmit"
-        args = [
-          "--driver-memory", "20g",
-          "--executor-memory", "20g",
-          "--executor-cores", "2",
-          "--master", "local[2]",
-          "iterative_ndrpdr_rls2302.py"
-        ]
-        work_dir = "/local/csit/csit.infra.etl"
-      }
-      env {
-        AWS_ACCESS_KEY_ID         = "${aws_access_key_id}"
-        AWS_SECRET_ACCESS_KEY     = "${aws_secret_access_key}"
-        AWS_DEFAULT_REGION        = "${aws_default_region}"
-        OUT_AWS_ACCESS_KEY_ID     = "${out_aws_access_key_id}"
-        OUT_AWS_SECRET_ACCESS_KEY = "${out_aws_secret_access_key}"
-        OUT_AWS_DEFAULT_REGION    = "${out_aws_default_region}"
-        ${ envs }
-      }
-      resources {
-        cpu    = ${cpu}
-        memory = ${memory}
-      }
-    }
-    task "${job_name}-iterative-reconf" {
-      artifact {
-        source      = "git::https://github.com/FDio/csit"
-        destination = "local/csit"
-      }
-      driver = "docker"
-      config {
-        image   = "${image}"
-        command = "gluesparksubmit"
-        args = [
-          "--driver-memory", "20g",
-          "--executor-memory", "20g",
-          "--executor-cores", "2",
-          "--master", "local[2]",
-          "iterative_reconf_rls2302.py"
-        ]
-        work_dir = "/local/csit/csit.infra.etl"
-      }
-      env {
-        AWS_ACCESS_KEY_ID         = "${aws_access_key_id}"
-        AWS_SECRET_ACCESS_KEY     = "${aws_secret_access_key}"
-        AWS_DEFAULT_REGION        = "${aws_default_region}"
-        OUT_AWS_ACCESS_KEY_ID     = "${out_aws_access_key_id}"
-        OUT_AWS_SECRET_ACCESS_KEY = "${out_aws_secret_access_key}"
-        OUT_AWS_DEFAULT_REGION    = "${out_aws_default_region}"
-        ${ envs }
-      }
-      resources {
-        cpu    = ${cpu}
-        memory = ${memory}
-      }
-    }
-    task "${job_name}-iterative-soak" {
-      artifact {
-        source      = "git::https://github.com/FDio/csit"
-        destination = "local/csit"
-      }
-      driver = "docker"
-      config {
-        image   = "${image}"
-        command = "gluesparksubmit"
-        args = [
-          "--driver-memory", "20g",
-          "--executor-memory", "20g",
-          "--executor-cores", "2",
-          "--master", "local[2]",
-          "iterative_soak_rls2302.py"
-        ]
-        work_dir = "/local/csit/csit.infra.etl"
-      }
-      env {
-        AWS_ACCESS_KEY_ID         = "${aws_access_key_id}"
-        AWS_SECRET_ACCESS_KEY     = "${aws_secret_access_key}"
-        AWS_DEFAULT_REGION        = "${aws_default_region}"
-        OUT_AWS_ACCESS_KEY_ID     = "${out_aws_access_key_id}"
-        OUT_AWS_SECRET_ACCESS_KEY = "${out_aws_secret_access_key}"
-        OUT_AWS_DEFAULT_REGION    = "${out_aws_default_region}"
-        ${ envs }
-      }
-      resources {
-        cpu    = ${cpu}
-        memory = ${memory}
-      }
-    }
-  }
-}