terraform-ci-infra/1n_nmd/grafana/conf/nomad/grafana.hcl

   1 job "${job_name}" {
   2   # The "region" parameter specifies the region in which to execute the job.
   3   # If omitted, this inherits the default region name of "global".
   4   # region = "global"
   5   #
   6   # The "datacenters" parameter specifies the list of datacenters which should
   7   # be considered when placing this task. This must be provided.
   8   datacenters         = "${datacenters}"
   9
  10   # The "type" parameter controls the type of job, which impacts the scheduler's
  11   # decision on placement. This configuration is optional and defaults to
  12   # "service". For a full list of job types and their differences, please see
  13   # the online documentation.
  14   #
  15   # For more information, please see the online documentation at:
  16   #
  17   #     https://www.nomadproject.io/docs/jobspec/schedulers
  18   #
  19   type                = "service"
  20
  21   update {
  22     # The "max_parallel" parameter specifies the maximum number of updates to
  23     # perform in parallel. In this case, this specifies to update a single task
  24     # at a time.
  25     max_parallel      = 1
  26
  27     health_check      = "checks"
  28
  29     # The "min_healthy_time" parameter specifies the minimum time the allocation
  30     # must be in the healthy state before it is marked as healthy and unblocks
  31     # further allocations from being updated.
  32     min_healthy_time  = "10s"
  33
  34     # The "healthy_deadline" parameter specifies the deadline in which the
  35     # allocation must be marked as healthy after which the allocation is
  36     # automatically transitioned to unhealthy. Transitioning to unhealthy will
  37     # fail the deployment and potentially roll back the job if "auto_revert" is
  38     # set to true.
  39     healthy_deadline  = "3m"
  40
  41     # The "progress_deadline" parameter specifies the deadline in which an
  42     # allocation must be marked as healthy. The deadline begins when the first
  43     # allocation for the deployment is created and is reset whenever an allocation
  44     # as part of the deployment transitions to a healthy state. If no allocation
  45     # transitions to the healthy state before the progress deadline, the
  46     # deployment is marked as failed.
  47     progress_deadline = "10m"
  48
  49 %{ if use_canary }
  50     # The "canary" parameter specifies that changes to the job that would result
  51     # in destructive updates should create the specified number of canaries
  52     # without stopping any previous allocations. Once the operator determines the
  53     # canaries are healthy, they can be promoted which unblocks a rolling update
  54     # of the remaining allocations at a rate of "max_parallel".
  55     #
  56     # Further, setting "canary" equal to the count of the task group allows
  57     # blue/green deployments. When the job is updated, a full set of the new
  58     # version is deployed and upon promotion the old version is stopped.
  59     canary            = 1
  60
  61     # Specifies if the job should auto-promote to the canary version when all
  62     # canaries become healthy during a deployment. Defaults to false which means
  63     # canaries must be manually updated with the nomad deployment promote
  64     # command.
  65     auto_promote      = true
  66
  67     # The "auto_revert" parameter specifies if the job should auto-revert to the
  68     # last stable job on deployment failure. A job is marked as stable if all the
  69     # allocations as part of its deployment were marked healthy.
  70     auto_revert       = true
  71 %{ endif }
  72   }
  73
  74   # The "group" stanza defines a series of tasks that should be co-located on
  75   # the same Nomad client. Any task within a group will be placed on the same
  76   # client.
  77   #
  78   # For more information and examples on the "group" stanza, please see
  79   # the online documentation at:
  80   #
  81   #     https://www.nomadproject.io/docs/job-specification/group
  82   #
  83   group "prod-group1-${service_name}" {
  84     # The "count" parameter specifies the number of the task groups that should
  85     # be running under this group. This value must be non-negative and defaults
  86     # to 1.
  87     count             = ${group_count}
  88
  89
  90     # The constraint allows restricting the set of eligible nodes. Constraints
  91     # may filter on attributes or client metadata.
  92     #
  93     # For more information and examples on the "volume" stanza, please see
  94     # the online documentation at:
  95     #
  96     #     https://www.nomadproject.io/docs/job-specification/constraint
  97     #
  98     constraint {
  99       attribute       = "$${attr.cpu.arch}"
 100       operator        = "!="
 101       value           = "arm64"
 102     }
 103
 104     # The "task" stanza creates an individual unit of work, such as a Docker
 105     # container, web application, or batch processing.
 106     #
 107     # For more information and examples on the "task" stanza, please see
 108     # the online documentation at:
 109     #
 110     #     https://www.nomadproject.io/docs/job-specification/task
 111     #
 112     task "prod-task1-${service_name}" {
 113       # The "driver" parameter specifies the task driver that should be used to
 114       # run the task.
 115       driver          = "docker"
 116
 117       # The "config" stanza specifies the driver configuration, which is passed
 118       # directly to the driver to start the task. The details of configurations
 119       # are specific to each driver, so please see specific driver
 120       # documentation for more information.
 121       config {
 122         image         = "${image}"
 123         dns_servers   = [ "$${attr.unique.network.ip-address}" ]
 124         volumes       = [
 125           "secrets/prometheus.yml:/etc/grafana/provisioning/datasources/prometheus.yml",
 126           "secrets/dashboards.yml:/etc/grafana/provisioning/dashboards/dashboards.yml",
 127           "secrets/grafana.ini:/etc/grafana/grafana.ini",
 128           "secrets/node_exporter.json:/etc/grafana/provisioning/dashboards/node_exporter.json",
 129           "secrets/docker_cadvisor.json:/etc/grafana/provisioning/dashboards/docker_cadvisor.json",
 130           "secrets/nomad.json:/etc/grafana/provisioning/dashboards/nomad.json",
 131           "secrets/consul.json:/etc/grafana/provisioning/dashboards/consul.json",
 132           "secrets/prometheus.json:/etc/grafana/provisioning/dashboards/prometheus.json",
 133           "secrets/blackbox_exporter_http.json:/etc/grafana/provisioning/dashboards/blackbox_exporter_http.json",
 134           "secrets/blackbox_exporter_icmp.json:/etc/grafana/provisioning/dashboards/blackbox_exporter_icmp.json"
 135         ]
 136       }
 137
 138       artifact {
 139         # Prometheus Node Exporter
 140         source        = "https://raw.githubusercontent.com/pmikus/grafana-dashboards/main/node_exporter.json"
 141         destination   = "secrets/"
 142       }
 143
 144       artifact {
 145         # Docker cAdvisor
 146         source        = "https://raw.githubusercontent.com/pmikus/grafana-dashboards/main/docker_cadvisor.json"
 147         destination   = "secrets/"
 148       }
 149
 150       artifact {
 151         # Nomad
 152         source        = "https://raw.githubusercontent.com/pmikus/grafana-dashboards/main/nomad.json"
 153         destination   = "secrets/"
 154       }
 155
 156       artifact {
 157         # Consul
 158         source        = "https://raw.githubusercontent.com/pmikus/grafana-dashboards/main/consul.json"
 159         destination   = "secrets/"
 160       }
 161
 162       artifact {
 163         # Prometheus
 164         source        = "https://raw.githubusercontent.com/pmikus/grafana-dashboards/main/prometheus.json"
 165         destination   = "secrets/"
 166       }
 167
 168       artifact {
 169         # Prometheus Blackbox Exporter HTTP
 170         source        = "https://raw.githubusercontent.com/pmikus/grafana-dashboards/main/blackbox_exporter_http.json"
 171         destination   = "secrets/"
 172       }
 173
 174       artifact {
 175         # Prometheus Blackbox Exporter ICMP
 176         source        = "https://raw.githubusercontent.com/pmikus/grafana-dashboards/main/blackbox_exporter_icmp.json"
 177         destination   = "secrets/"
 178       }
 179
 180       # The "template" stanza instructs Nomad to manage a template, such as
 181       # a configuration file or script. This template can optionally pull data
 182       # from Consul or Vault to populate runtime configuration data.
 183       #
 184       # For more information and examples on the "template" stanza, please see
 185       # the online documentation at:
 186       #
 187       #     https://www.nomadproject.io/docs/job-specification/template
 188       #
 189       template {
 190         change_mode   = "noop"
 191         change_signal = "SIGINT"
 192         destination   = "secrets/prometheus.yml"
 193         data          = <<EOH
 194 apiVersion: 1
 195 datasources:
 196 - name: Prometheus
 197   type: prometheus
 198   access: direct
 199   orgId: 1
 200   url: http://prometheus.service.consul:9090
 201   basicAuth: false
 202   isDefault: true
 203   version: 1
 204   editable: false
 205 EOH
 206       }
 207
 208       template {
 209         change_mode   = "noop"
 210         change_signal = "SIGINT"
 211         destination   = "secrets/dashboards.yml"
 212         data          = <<EOH
 213 apiVersion: 1
 214 providers:
 215 - name: dashboards
 216   type: file
 217   disableDeletion: false
 218   updateIntervalSeconds: 10
 219   allowUiUpdates: false
 220   options:
 221     path: /etc/grafana/provisioning/dashboards
 222     foldersFromFilesStructure: true
 223 EOH
 224       }
 225
 226       template {
 227         change_mode   = "noop"
 228         change_signal = "SIGINT"
 229         destination   = "secrets/grafana.ini"
 230         data          = <<EOH
 231 app_mode = production
 232
 233 [metrics]
 234 enabled = true
 235
 236 [server]
 237 protocol = http
 238 http_port = ${port}
 239 root_url = http://${service_name}.service.consul:${port}
 240 enable_gzip = true
 241 ;cert_file =
 242 ;cert_key =
 243
 244 [security]
 245 admin_user = grafanauser
 246 admin_password = Grafana1234
 247 secret_key = SW2YcwTIb9zpOOhoPsMm
 248
 249 [users]
 250 allow_sign_up = false
 251 allow_org_create = false
 252 auto_assign_org = true
 253 auto_assign_org_role = Viewer
 254 default_theme = dark
 255
 256 [auth.basic]
 257 enabled = true
 258
 259 [auth]
 260 disable_login_form = false
 261 disable_signout_menu = false
 262
 263 [auth.anonymous]
 264 enabled = false
 265
 266 [log]
 267 mode = console
 268 level = info
 269
 270 [log.console]
 271 level = info
 272 format = console
 273 EOH
 274       }
 275
 276       # The service stanza instructs Nomad to register a service with Consul.
 277       #
 278       # For more information and examples on the "task" stanza, please see
 279       # the online documentation at:
 280       #
 281       #     https://www.nomadproject.io/docs/job-specification/service
 282       #
 283       service {
 284         name              = "${service_name}"
 285         port              = "${service_name}"
 286         tags              = [ "${service_name}$${NOMAD_ALLOC_INDEX}" ]
 287         check {
 288           name            = "Grafana Check Live"
 289           type            = "http"
 290           protocol        = "http"
 291           tls_skip_verify = true
 292           path            = "/api/health"
 293           interval        = "10s"
 294           timeout         = "2s"
 295         }
 296       }
 297
 298       # The "resources" stanza describes the requirements a task needs to
 299       # execute. Resource requirements include memory, network, cpu, and more.
 300       # This ensures the task will execute on a machine that contains enough
 301       # resource capacity.
 302       #
 303       # For more information and examples on the "resources" stanza, please see
 304       # the online documentation at:
 305       #
 306       #     https://www.nomadproject.io/docs/job-specification/resources
 307       #
 308       resources {
 309         cpu               = ${cpu}
 310         memory            = ${mem}
 311         # The network stanza specifies the networking requirements for the task
 312         # group, including the network mode and port allocations. When scheduling
 313         # jobs in Nomad they are provisioned across your fleet of machines along
 314         # with other jobs and services. Because you don't know in advance what host
 315         # your job will be provisioned on, Nomad will provide your tasks with
 316         # network configuration when they start up.
 317         #
 318         # For more information and examples on the "template" stanza, please see
 319         # the online documentation at:
 320         #
 321         #     https://www.nomadproject.io/docs/job-specification/network
 322         #
 323         network {
 324           port "${service_name}" {
 325             static        = ${port}
 326           }
 327         }
 328       }
 329     }
 330   }
 331 }