From 6f7bd1dcb877caa6bfd31ce014b3d5c098114759 Mon Sep 17 00:00:00 2001 From: Peter Mikus Date: Fri, 5 Aug 2022 11:04:47 +0000 Subject: [PATCH] fix(etl): Adjust resources Signed-off-by: Peter Mikus Change-Id: I3f4dcd24285bd162c2d9ab159a4928671b27feb4 --- .../conf/nomad/etl.hcl.tftpl | 298 ++++++++++----------- .../terraform-nomad-pyspark-etl/fdio/variables.tf | 2 +- .../terraform-nomad-pyspark-etl/fdio/versions.tf | 2 +- .../terraform-nomad-pyspark-etl/variables.tf | 4 +- 4 files changed, 153 insertions(+), 153 deletions(-) diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl.hcl.tftpl b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl.hcl.tftpl index 361ba357fa..0d0ecfa318 100644 --- a/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl.hcl.tftpl +++ b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl.hcl.tftpl @@ -86,8 +86,8 @@ job "${job_name}" { image = "${image}" command = "gluesparksubmit" args = [ - "--driver-memory", "20g", - "--executor-memory", "20g", + "--driver-memory", "30g", + "--executor-memory", "30g", "trending.py" ] work_dir = "/local/csit/csit.infra.etl" @@ -172,151 +172,151 @@ job "${job_name}" { } } } - group "${job_name}-rls2206" { - # The restart stanza configures a tasks's behavior on task failure. Restarts - # happen on the client that is running the task. - # - # https://www.nomadproject.io/docs/job-specification/restart - # - restart { - mode = "fail" - } - - # The constraint allows restricting the set of eligible nodes. Constraints - # may filter on attributes or client metadata. - # - # For more information and examples on the "volume" stanza, please see - # the online documentation at: - # - # https://www.nomadproject.io/docs/job-specification/constraint - # - constraint { - attribute = "$${attr.cpu.arch}" - operator = "!=" - value = "arm64" - } - - constraint { - attribute = "$${node.class}" - value = "builder" - } - - # The "task" stanza creates an individual unit of work, such as a Docker - # container, web application, or batch processing. - # - # https://www.nomadproject.io/docs/job-specification/task.html - # - task "${job_name}-coverage" { - # The artifact stanza instructs Nomad to fetch and unpack a remote - # resource, such as a file, tarball, or binary. - # - # https://www.nomadproject.io/docs/job-specification/artifact - # - artifact { - source = "git::https://github.com/FDio/csit" - destination = "local/csit" - } - - # The "driver" parameter specifies the task driver that should be used to - # run the task. - driver = "docker" - - # The "config" stanza specifies the driver configuration, which is passed - # directly to the driver to start the task. The details of configurations - # are specific to each driver, so please see specific driver - # documentation for more information. - config { - image = "${image}" - command = "gluesparksubmit" - args = [ - "--driver-memory", "20g", - "--executor-memory", "20g", - "--executor-cores", "2", - "--master", "local[2]", - "coverage_rls2206.py" - ] - work_dir = "/local/csit/csit.infra.etl" - } - - # The env stanza configures a list of environment variables to populate - # the task's environment before starting. - env { - AWS_ACCESS_KEY_ID = "${aws_access_key_id}" - AWS_SECRET_ACCESS_KEY = "${aws_secret_access_key}" - AWS_DEFAULT_REGION = "${aws_default_region}" - OUT_AWS_ACCESS_KEY_ID = "${out_aws_access_key_id}" - OUT_AWS_SECRET_ACCESS_KEY = "${out_aws_secret_access_key}" - OUT_AWS_DEFAULT_REGION = "${out_aws_default_region}" - ${ envs } - } - - # The "resources" stanza describes the requirements a task needs to - # execute. Resource requirements include memory, network, cpu, and more. - # This ensures the task will execute on a machine that contains enough - # resource capacity. - # - # https://www.nomadproject.io/docs/job-specification/resources - # - resources { - cpu = ${cpu} - memory = ${memory} - } - } - task "${job_name}-iterative" { - # The artifact stanza instructs Nomad to fetch and unpack a remote - # resource, such as a file, tarball, or binary. - # - # https://www.nomadproject.io/docs/job-specification/artifact - # - artifact { - source = "git::https://github.com/FDio/csit" - destination = "local/csit" - } - - # The "driver" parameter specifies the task driver that should be used to - # run the task. - driver = "docker" - - # The "config" stanza specifies the driver configuration, which is passed - # directly to the driver to start the task. The details of configurations - # are specific to each driver, so please see specific driver - # documentation for more information. - config { - image = "${image}" - command = "gluesparksubmit" - args = [ - "--driver-memory", "20g", - "--executor-memory", "20g", - "--executor-cores", "2", - "--master", "local[2]", - "iterative_rls2206.py" - ] - work_dir = "/local/csit/csit.infra.etl" - } - - # The env stanza configures a list of environment variables to populate - # the task's environment before starting. - env { - AWS_ACCESS_KEY_ID = "${aws_access_key_id}" - AWS_SECRET_ACCESS_KEY = "${aws_secret_access_key}" - AWS_DEFAULT_REGION = "${aws_default_region}" - OUT_AWS_ACCESS_KEY_ID = "${out_aws_access_key_id}" - OUT_AWS_SECRET_ACCESS_KEY = "${out_aws_secret_access_key}" - OUT_AWS_DEFAULT_REGION = "${out_aws_default_region}" - ${ envs } - } - - # The "resources" stanza describes the requirements a task needs to - # execute. Resource requirements include memory, network, cpu, and more. - # This ensures the task will execute on a machine that contains enough - # resource capacity. - # - # https://www.nomadproject.io/docs/job-specification/resources - # - resources { - cpu = ${cpu} - memory = ${memory} - } - } - } +# group "${job_name}-rls2206" { +# # The restart stanza configures a tasks's behavior on task failure. Restarts +# # happen on the client that is running the task. +# # +# # https://www.nomadproject.io/docs/job-specification/restart +# # +# restart { +# mode = "fail" +# } +# +# # The constraint allows restricting the set of eligible nodes. Constraints +# # may filter on attributes or client metadata. +# # +# # For more information and examples on the "volume" stanza, please see +# # the online documentation at: +# # +# # https://www.nomadproject.io/docs/job-specification/constraint +# # +# constraint { +# attribute = "$${attr.cpu.arch}" +# operator = "!=" +# value = "arm64" +# } +# +# constraint { +# attribute = "$${node.class}" +# value = "builder" +# } +# +# # The "task" stanza creates an individual unit of work, such as a Docker +# # container, web application, or batch processing. +# # +# # https://www.nomadproject.io/docs/job-specification/task.html +# # +# task "${job_name}-coverage" { +# # The artifact stanza instructs Nomad to fetch and unpack a remote +# # resource, such as a file, tarball, or binary. +# # +# # https://www.nomadproject.io/docs/job-specification/artifact +# # +# artifact { +# source = "git::https://github.com/FDio/csit" +# destination = "local/csit" +# } +# +# # The "driver" parameter specifies the task driver that should be used to +# # run the task. +# driver = "docker" +# +# # The "config" stanza specifies the driver configuration, which is passed +# # directly to the driver to start the task. The details of configurations +# # are specific to each driver, so please see specific driver +# # documentation for more information. +# config { +# image = "${image}" +# command = "gluesparksubmit" +# args = [ +# "--driver-memory", "20g", +# "--executor-memory", "20g", +# "--executor-cores", "2", +# "--master", "local[2]", +# "coverage_rls2206.py" +# ] +# work_dir = "/local/csit/csit.infra.etl" +# } +# +# # The env stanza configures a list of environment variables to populate +# # the task's environment before starting. +# env { +# AWS_ACCESS_KEY_ID = "${aws_access_key_id}" +# AWS_SECRET_ACCESS_KEY = "${aws_secret_access_key}" +# AWS_DEFAULT_REGION = "${aws_default_region}" +# OUT_AWS_ACCESS_KEY_ID = "${out_aws_access_key_id}" +# OUT_AWS_SECRET_ACCESS_KEY = "${out_aws_secret_access_key}" +# OUT_AWS_DEFAULT_REGION = "${out_aws_default_region}" +# ${ envs } +# } +# +# # The "resources" stanza describes the requirements a task needs to +# # execute. Resource requirements include memory, network, cpu, and more. +# # This ensures the task will execute on a machine that contains enough +# # resource capacity. +# # +# # https://www.nomadproject.io/docs/job-specification/resources +# # +# resources { +# cpu = ${cpu} +# memory = ${memory} +# } +# } +# task "${job_name}-iterative" { +# # The artifact stanza instructs Nomad to fetch and unpack a remote +# # resource, such as a file, tarball, or binary. +# # +# # https://www.nomadproject.io/docs/job-specification/artifact +# # +# artifact { +# source = "git::https://github.com/FDio/csit" +# destination = "local/csit" +# } +# +# # The "driver" parameter specifies the task driver that should be used to +# # run the task. +# driver = "docker" +# +# # The "config" stanza specifies the driver configuration, which is passed +# # directly to the driver to start the task. The details of configurations +# # are specific to each driver, so please see specific driver +# # documentation for more information. +# config { +# image = "${image}" +# command = "gluesparksubmit" +# args = [ +# "--driver-memory", "20g", +# "--executor-memory", "20g", +# "--executor-cores", "2", +# "--master", "local[2]", +# "iterative_rls2206.py" +# ] +# work_dir = "/local/csit/csit.infra.etl" +# } +# +# # The env stanza configures a list of environment variables to populate +# # the task's environment before starting. +# env { +# AWS_ACCESS_KEY_ID = "${aws_access_key_id}" +# AWS_SECRET_ACCESS_KEY = "${aws_secret_access_key}" +# AWS_DEFAULT_REGION = "${aws_default_region}" +# OUT_AWS_ACCESS_KEY_ID = "${out_aws_access_key_id}" +# OUT_AWS_SECRET_ACCESS_KEY = "${out_aws_secret_access_key}" +# OUT_AWS_DEFAULT_REGION = "${out_aws_default_region}" +# ${ envs } +# } +# +# # The "resources" stanza describes the requirements a task needs to +# # execute. Resource requirements include memory, network, cpu, and more. +# # This ensures the task will execute on a machine that contains enough +# # resource capacity. +# # +# # https://www.nomadproject.io/docs/job-specification/resources +# # +# resources { +# cpu = ${cpu} +# memory = ${memory} +# } +# } +# } } diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/fdio/variables.tf b/fdio.infra.terraform/terraform-nomad-pyspark-etl/fdio/variables.tf index 976fbf849e..31c462632b 100644 --- a/fdio.infra.terraform/terraform-nomad-pyspark-etl/fdio/variables.tf +++ b/fdio.infra.terraform/terraform-nomad-pyspark-etl/fdio/variables.tf @@ -7,7 +7,7 @@ variable "nomad_acl" { variable "nomad_provider_address" { description = "FD.io Nomad cluster address." type = string - default = "http://10.32.8.14:4646" + default = "http://nomad.service.consul:4646" } variable "nomad_provider_ca_file" { diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/fdio/versions.tf b/fdio.infra.terraform/terraform-nomad-pyspark-etl/fdio/versions.tf index 526e1d0df0..24e7c94564 100644 --- a/fdio.infra.terraform/terraform-nomad-pyspark-etl/fdio/versions.tf +++ b/fdio.infra.terraform/terraform-nomad-pyspark-etl/fdio/versions.tf @@ -1,6 +1,6 @@ terraform { backend "consul" { - address = "10.32.8.14:8500" + address = "vault.service.consul:8500" scheme = "http" path = "terraform/etl" } diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/variables.tf b/fdio.infra.terraform/terraform-nomad-pyspark-etl/variables.tf index 3c6c12a943..9357c096f3 100644 --- a/fdio.infra.terraform/terraform-nomad-pyspark-etl/variables.tf +++ b/fdio.infra.terraform/terraform-nomad-pyspark-etl/variables.tf @@ -27,7 +27,7 @@ variable "aws_default_region" { variable "cpu" { description = "Specifies the CPU required to run this task in MHz." type = number - default = 10000 + default = 20000 } variable "cron" { @@ -57,7 +57,7 @@ variable "job_name" { variable "memory" { description = "Specifies the memory required in MB." type = number - default = 20000 + default = 60000 } variable "out_aws_access_key_id" { -- 2.16.6