X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=fdio.infra.terraform%2Fterraform-nomad-pyspark-etl%2Fconf%2Fnomad%2Fetl.hcl.tftpl;fp=fdio.infra.terraform%2Fterraform-nomad-pyspark-etl%2Fconf%2Fnomad%2Fetl.hcl.tftpl;h=208fb0a59f0870c6b194a4a192771dbb7595c549;hb=ca81b6ccfdcee62846217f824c1e4a1610b8a950;hp=0000000000000000000000000000000000000000;hpb=e972e67afac3ab3eb785668d01d3bdf1833eade9;p=csit.git diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl.hcl.tftpl b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl.hcl.tftpl new file mode 100644 index 0000000000..208fb0a59f --- /dev/null +++ b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl.hcl.tftpl @@ -0,0 +1,318 @@ +job "${job_name}" { + # The "datacenters" parameter specifies the list of datacenters which should + # be considered when placing this task. This must be provided. + datacenters = "${datacenters}" + + # The "type" parameter controls the type of job, which impacts the scheduler's + # decision on placement. For a full list of job types and their differences, + # please see the online documentation. + # + # https://www.nomadproject.io/docs/jobspec/schedulers + # + type = "${type}" + + # The periodic stanza allows a job to run at fixed times, dates, or intervals. + # The easiest way to think about the periodic scheduler is "Nomad cron" or + # "distributed cron". + # + # https://www.nomadproject.io/docs/job-specification/periodic + # + periodic { + cron = "${cron}" + prohibit_overlap = "${prohibit_overlap}" + time_zone = "${time_zone}" + } + + # The "group" stanza defines a series of tasks that should be co-located on + # the same Nomad client. Any task within a group will be placed on the same + # client. + # + # https://www.nomadproject.io/docs/job-specification/group + # + group "${job_name}-master" { + # The restart stanza configures a tasks's behavior on task failure. Restarts + # happen on the client that is running the task. + # + # https://www.nomadproject.io/docs/job-specification/restart + # + restart { + mode = "fail" + } + + # The constraint allows restricting the set of eligible nodes. Constraints + # may filter on attributes or client metadata. + # + # For more information and examples on the "volume" stanza, please see + # the online documentation at: + # + # https://www.nomadproject.io/docs/job-specification/constraint + # + constraint { + attribute = "$${attr.cpu.arch}" + operator = "!=" + value = "arm64" + } + + constraint { + attribute = "$${node.class}" + value = "builder" + } + + # The "task" stanza creates an individual unit of work, such as a Docker + # container, web application, or batch processing. + # + # https://www.nomadproject.io/docs/job-specification/task.html + # + task "${job_name}-trending" { + # The artifact stanza instructs Nomad to fetch and unpack a remote + # resource, such as a file, tarball, or binary. + # + # https://www.nomadproject.io/docs/job-specification/artifact + # + artifact { + source = "git::https://github.com/FDio/csit" + destination = "local/csit" + } + + # The "driver" parameter specifies the task driver that should be used to + # run the task. + driver = "docker" + + # The "config" stanza specifies the driver configuration, which is passed + # directly to the driver to start the task. The details of configurations + # are specific to each driver, so please see specific driver + # documentation for more information. + config { + image = "${image}" + command = "gluesparksubmit" + args = [ + "--driver-memory", "20g", + "--executor-memory", "20g", + "trending.py" + ] + work_dir = "/local/csit/csit.infra.etl" + } + + # The env stanza configures a list of environment variables to populate + # the task's environment before starting. + env { + AWS_ACCESS_KEY_ID = "${aws_access_key_id}" + AWS_SECRET_ACCESS_KEY = "${aws_secret_access_key}" + AWS_DEFAULT_REGION = "${aws_default_region}" + OUT_AWS_ACCESS_KEY_ID = "${out_aws_access_key_id}" + OUT_AWS_SECRET_ACCESS_KEY = "${out_aws_secret_access_key}" + OUT_AWS_DEFAULT_REGION = "${out_aws_default_region}" + ${ envs } + } + + # The "resources" stanza describes the requirements a task needs to + # execute. Resource requirements include memory, network, cpu, and more. + # This ensures the task will execute on a machine that contains enough + # resource capacity. + # + # https://www.nomadproject.io/docs/job-specification/resources + # + resources { + cpu = ${cpu} + memory = ${memory} + } + } + task "${job_name}-stats" { + # The artifact stanza instructs Nomad to fetch and unpack a remote + # resource, such as a file, tarball, or binary. + # + # https://www.nomadproject.io/docs/job-specification/artifact + # + artifact { + source = "git::https://github.com/FDio/csit" + destination = "local/csit" + } + + # The "driver" parameter specifies the task driver that should be used to + # run the task. + driver = "docker" + + # The "config" stanza specifies the driver configuration, which is passed + # directly to the driver to start the task. The details of configurations + # are specific to each driver, so please see specific driver + # documentation for more information. + config { + image = "${image}" + command = "gluesparksubmit" + args = [ + "--driver-memory", "10g", + "--executor-memory", "10g", + "stats.py" + ] + work_dir = "/local/csit/csit.infra.etl" + } + + # The env stanza configures a list of environment variables to populate + # the task's environment before starting. + env { + AWS_ACCESS_KEY_ID = "${aws_access_key_id}" + AWS_SECRET_ACCESS_KEY = "${aws_secret_access_key}" + AWS_DEFAULT_REGION = "${aws_default_region}" + OUT_AWS_ACCESS_KEY_ID = "${out_aws_access_key_id}" + OUT_AWS_SECRET_ACCESS_KEY = "${out_aws_secret_access_key}" + OUT_AWS_DEFAULT_REGION = "${out_aws_default_region}" + ${ envs } + } + + # The "resources" stanza describes the requirements a task needs to + # execute. Resource requirements include memory, network, cpu, and more. + # This ensures the task will execute on a machine that contains enough + # resource capacity. + # + # https://www.nomadproject.io/docs/job-specification/resources + # + resources { + cpu = ${cpu} + memory = ${memory} + } + } + } + group "${job_name}-rls2202" { + # The restart stanza configures a tasks's behavior on task failure. Restarts + # happen on the client that is running the task. + # + # https://www.nomadproject.io/docs/job-specification/restart + # + restart { + mode = "fail" + } + + # The constraint allows restricting the set of eligible nodes. Constraints + # may filter on attributes or client metadata. + # + # For more information and examples on the "volume" stanza, please see + # the online documentation at: + # + # https://www.nomadproject.io/docs/job-specification/constraint + # + constraint { + attribute = "$${attr.cpu.arch}" + operator = "!=" + value = "arm64" + } + + constraint { + attribute = "$${node.class}" + value = "builder" + } + + # The "task" stanza creates an individual unit of work, such as a Docker + # container, web application, or batch processing. + # + # https://www.nomadproject.io/docs/job-specification/task.html + # + task "${job_name}-coverage" { + # The artifact stanza instructs Nomad to fetch and unpack a remote + # resource, such as a file, tarball, or binary. + # + # https://www.nomadproject.io/docs/job-specification/artifact + # + artifact { + source = "git::https://github.com/FDio/csit" + destination = "local/csit" + } + + # The "driver" parameter specifies the task driver that should be used to + # run the task. + driver = "docker" + + # The "config" stanza specifies the driver configuration, which is passed + # directly to the driver to start the task. The details of configurations + # are specific to each driver, so please see specific driver + # documentation for more information. + config { + image = "${image}" + command = "gluesparksubmit" + args = [ + "--driver-memory", "20g", + "--executor-memory", "20g", + "coverage_rls2202.py" + ] + work_dir = "/local/csit/csit.infra.etl" + } + + # The env stanza configures a list of environment variables to populate + # the task's environment before starting. + env { + AWS_ACCESS_KEY_ID = "${aws_access_key_id}" + AWS_SECRET_ACCESS_KEY = "${aws_secret_access_key}" + AWS_DEFAULT_REGION = "${aws_default_region}" + OUT_AWS_ACCESS_KEY_ID = "${out_aws_access_key_id}" + OUT_AWS_SECRET_ACCESS_KEY = "${out_aws_secret_access_key}" + OUT_AWS_DEFAULT_REGION = "${out_aws_default_region}" + ${ envs } + } + + # The "resources" stanza describes the requirements a task needs to + # execute. Resource requirements include memory, network, cpu, and more. + # This ensures the task will execute on a machine that contains enough + # resource capacity. + # + # https://www.nomadproject.io/docs/job-specification/resources + # + resources { + cpu = ${cpu} + memory = ${memory} + } + } + task "${job_name}-iterative" { + # The artifact stanza instructs Nomad to fetch and unpack a remote + # resource, such as a file, tarball, or binary. + # + # https://www.nomadproject.io/docs/job-specification/artifact + # + artifact { + source = "git::https://github.com/FDio/csit" + destination = "local/csit" + } + + # The "driver" parameter specifies the task driver that should be used to + # run the task. + driver = "docker" + + # The "config" stanza specifies the driver configuration, which is passed + # directly to the driver to start the task. The details of configurations + # are specific to each driver, so please see specific driver + # documentation for more information. + config { + image = "${image}" + command = "gluesparksubmit" + args = [ + "--driver-memory", "20g", + "--executor-memory", "20g", + "iterative_rls2202.py" + ] + work_dir = "/local/csit/csit.infra.etl" + } + + # The env stanza configures a list of environment variables to populate + # the task's environment before starting. + env { + AWS_ACCESS_KEY_ID = "${aws_access_key_id}" + AWS_SECRET_ACCESS_KEY = "${aws_secret_access_key}" + AWS_DEFAULT_REGION = "${aws_default_region}" + OUT_AWS_ACCESS_KEY_ID = "${out_aws_access_key_id}" + OUT_AWS_SECRET_ACCESS_KEY = "${out_aws_secret_access_key}" + OUT_AWS_DEFAULT_REGION = "${out_aws_default_region}" + ${ envs } + } + + # The "resources" stanza describes the requirements a task needs to + # execute. Resource requirements include memory, network, cpu, and more. + # This ensures the task will execute on a machine that contains enough + # resource capacity. + # + # https://www.nomadproject.io/docs/job-specification/resources + # + resources { + cpu = ${cpu} + memory = ${memory} + } + } + } +}