terraform-ci-infra/1n_nmd/grafana/conf/nomad/grafana.hcl

   1 job "${job_name}" {
   2   # The "region" parameter specifies the region in which to execute the job.
   3   # If omitted, this inherits the default region name of "global".
   4   # region = "global"
   5   #
   6   # The "datacenters" parameter specifies the list of datacenters which should
   7   # be considered when placing this task. This must be provided.
   8   datacenters         = "${datacenters}"
   9
  10   # The "type" parameter controls the type of job, which impacts the scheduler's
  11   # decision on placement. This configuration is optional and defaults to
  12   # "service". For a full list of job types and their differences, please see
  13   # the online documentation.
  14   #
  15   # For more information, please see the online documentation at:
  16   #
  17   #     https://www.nomadproject.io/docs/jobspec/schedulers
  18   #
  19   type                = "service"
  20
  21   update {
  22     # The "max_parallel" parameter specifies the maximum number of updates to
  23     # perform in parallel. In this case, this specifies to update a single task
  24     # at a time.
  25     max_parallel      = 1
  26
  27     health_check      = "checks"
  28
  29     # The "min_healthy_time" parameter specifies the minimum time the allocation
  30     # must be in the healthy state before it is marked as healthy and unblocks
  31     # further allocations from being updated.
  32     min_healthy_time  = "10s"
  33
  34     # The "healthy_deadline" parameter specifies the deadline in which the
  35     # allocation must be marked as healthy after which the allocation is
  36     # automatically transitioned to unhealthy. Transitioning to unhealthy will
  37     # fail the deployment and potentially roll back the job if "auto_revert" is
  38     # set to true.
  39     healthy_deadline  = "3m"
  40
  41     # The "progress_deadline" parameter specifies the deadline in which an
  42     # allocation must be marked as healthy. The deadline begins when the first
  43     # allocation for the deployment is created and is reset whenever an allocation
  44     # as part of the deployment transitions to a healthy state. If no allocation
  45     # transitions to the healthy state before the progress deadline, the
  46     # deployment is marked as failed.
  47     progress_deadline = "10m"
  48
  49 %{ if use_canary }
  50     # The "canary" parameter specifies that changes to the job that would result
  51     # in destructive updates should create the specified number of canaries
  52     # without stopping any previous allocations. Once the operator determines the
  53     # canaries are healthy, they can be promoted which unblocks a rolling update
  54     # of the remaining allocations at a rate of "max_parallel".
  55     #
  56     # Further, setting "canary" equal to the count of the task group allows
  57     # blue/green deployments. When the job is updated, a full set of the new
  58     # version is deployed and upon promotion the old version is stopped.
  59     canary            = 1
  60
  61     # Specifies if the job should auto-promote to the canary version when all
  62     # canaries become healthy during a deployment. Defaults to false which means
  63     # canaries must be manually updated with the nomad deployment promote
  64     # command.
  65     auto_promote      = true
  66
  67     # The "auto_revert" parameter specifies if the job should auto-revert to the
  68     # last stable job on deployment failure. A job is marked as stable if all the
  69     # allocations as part of its deployment were marked healthy.
  70     auto_revert       = true
  71 %{ endif }
  72   }
  73
  74   # The reschedule stanza specifies the group's rescheduling strategy. If
  75   # specified at the job level, the configuration will apply to all groups
  76   # within the job. If the reschedule stanza is present on both the job and the
  77   # group, they are merged with the group stanza taking the highest precedence
  78   # and then the job.
  79   reschedule {
  80     delay             = "30s"
  81     delay_function    = "constant"
  82     unlimited         = true
  83   }
  84
  85   # The "group" stanza defines a series of tasks that should be co-located on
  86   # the same Nomad client. Any task within a group will be placed on the same
  87   # client.
  88   #
  89   # For more information and examples on the "group" stanza, please see
  90   # the online documentation at:
  91   #
  92   #     https://www.nomadproject.io/docs/job-specification/group
  93   #
  94   group "prod-group1-${service_name}" {
  95     # The "count" parameter specifies the number of the task groups that should
  96     # be running under this group. This value must be non-negative and defaults
  97     # to 1.
  98     count             = ${group_count}
  99
 100     # The restart stanza configures a tasks's behavior on task failure. Restarts
 101     # happen on the client that is running the task.
 102     #
 103     # https://www.nomadproject.io/docs/job-specification/restart
 104     #
 105     restart {
 106       interval        = "30m"
 107       attempts        = 40
 108       delay           = "15s"
 109       mode            = "delay"
 110     }
 111
 112     # The constraint allows restricting the set of eligible nodes. Constraints
 113     # may filter on attributes or client metadata.
 114     #
 115     # For more information and examples on the "volume" stanza, please see
 116     # the online documentation at:
 117     #
 118     #     https://www.nomadproject.io/docs/job-specification/constraint
 119     #
 120     constraint {
 121       attribute       = "$${attr.cpu.arch}"
 122       operator        = "!="
 123       value           = "arm64"
 124     }
 125
 126     # The "task" stanza creates an individual unit of work, such as a Docker
 127     # container, web application, or batch processing.
 128     #
 129     # For more information and examples on the "task" stanza, please see
 130     # the online documentation at:
 131     #
 132     #     https://www.nomadproject.io/docs/job-specification/task
 133     #
 134     task "prod-task1-${service_name}" {
 135       # The "driver" parameter specifies the task driver that should be used to
 136       # run the task.
 137       driver          = "docker"
 138
 139       # The "config" stanza specifies the driver configuration, which is passed
 140       # directly to the driver to start the task. The details of configurations
 141       # are specific to each driver, so please see specific driver
 142       # documentation for more information.
 143       config {
 144         image         = "${image}"
 145         dns_servers   = [ "172.17.0.1" ]
 146         volumes       = [
 147           "secrets/prometheus.yml:/etc/grafana/provisioning/datasources/prometheus.yml",
 148           "secrets/dashboards.yml:/etc/grafana/provisioning/dashboards/dashboards.yml",
 149           "secrets/grafana.ini:/etc/grafana/grafana.ini",
 150           "secrets/node_exporter.json:/etc/grafana/provisioning/dashboards/node_exporter.json",
 151           "secrets/docker_cadvisor.json:/etc/grafana/provisioning/dashboards/docker_cadvisor.json",
 152           "secrets/nomad.json:/etc/grafana/provisioning/dashboards/nomad.json",
 153           "secrets/consul.json:/etc/grafana/provisioning/dashboards/consul.json",
 154           "secrets/prometheus.json:/etc/grafana/provisioning/dashboards/prometheus.json",
 155           "secrets/blackbox_exporter_http.json:/etc/grafana/provisioning/dashboards/blackbox_exporter_http.json",
 156           "secrets/blackbox_exporter_icmp.json:/etc/grafana/provisioning/dashboards/blackbox_exporter_icmp.json"
 157         ]
 158       }
 159
 160       artifact {
 161         # Prometheus Node Exporter
 162         source        = "https://raw.githubusercontent.com/pmikus/grafana-dashboards/main/node_exporter.json"
 163         destination   = "secrets/"
 164       }
 165
 166       artifact {
 167         # Docker cAdvisor
 168         source        = "https://raw.githubusercontent.com/pmikus/grafana-dashboards/main/docker_cadvisor.json"
 169         destination   = "secrets/"
 170       }
 171
 172       artifact {
 173         # Nomad
 174         source        = "https://raw.githubusercontent.com/pmikus/grafana-dashboards/main/nomad.json"
 175         destination   = "secrets/"
 176       }
 177
 178       artifact {
 179         # Consul
 180         source        = "https://raw.githubusercontent.com/pmikus/grafana-dashboards/main/consul.json"
 181         destination   = "secrets/"
 182       }
 183
 184       artifact {
 185         # Prometheus
 186         source        = "https://raw.githubusercontent.com/pmikus/grafana-dashboards/main/prometheus.json"
 187         destination   = "secrets/"
 188       }
 189
 190       artifact {
 191         # Prometheus Blackbox Exporter HTTP
 192         source        = "https://raw.githubusercontent.com/pmikus/grafana-dashboards/main/blackbox_exporter_http.json"
 193         destination   = "secrets/"
 194       }
 195
 196       artifact {
 197         # Prometheus Blackbox Exporter ICMP
 198         source        = "https://raw.githubusercontent.com/pmikus/grafana-dashboards/main/blackbox_exporter_icmp.json"
 199         destination   = "secrets/"
 200       }
 201
 202       # The "template" stanza instructs Nomad to manage a template, such as
 203       # a configuration file or script. This template can optionally pull data
 204       # from Consul or Vault to populate runtime configuration data.
 205       #
 206       # For more information and examples on the "template" stanza, please see
 207       # the online documentation at:
 208       #
 209       #     https://www.nomadproject.io/docs/job-specification/template
 210       #
 211       template {
 212         change_mode   = "noop"
 213         change_signal = "SIGINT"
 214         destination   = "secrets/prometheus.yml"
 215         data          = <<EOH
 216 apiVersion: 1
 217 datasources:
 218 - name: Prometheus
 219   type: prometheus
 220   access: direct
 221   orgId: 1
 222   url: http://prometheus.service.consul:9090
 223   basicAuth: false
 224   isDefault: true
 225   version: 1
 226   editable: false
 227 EOH
 228       }
 229
 230       template {
 231         change_mode   = "noop"
 232         change_signal = "SIGINT"
 233         destination   = "secrets/dashboards.yml"
 234         data          = <<EOH
 235 apiVersion: 1
 236 providers:
 237 - name: dashboards
 238   type: file
 239   disableDeletion: false
 240   updateIntervalSeconds: 10
 241   allowUiUpdates: false
 242   options:
 243     path: /etc/grafana/provisioning/dashboards
 244     foldersFromFilesStructure: true
 245 EOH
 246       }
 247
 248       template {
 249         change_mode   = "noop"
 250         change_signal = "SIGINT"
 251         destination   = "secrets/grafana.ini"
 252         data          = <<EOH
 253 app_mode = production
 254
 255 [metrics]
 256 enabled = true
 257
 258 [server]
 259 protocol = http
 260 http_port = ${port}
 261 root_url = http://${service_name}.service.consul:${port}
 262 enable_gzip = true
 263 ;cert_file =
 264 ;cert_key =
 265
 266 [security]
 267 admin_user = grafanauser
 268 admin_password = Grafana1234
 269 secret_key = SW2YcwTIb9zpOOhoPsMm
 270
 271 [users]
 272 allow_sign_up = false
 273 allow_org_create = false
 274 auto_assign_org = true
 275 auto_assign_org_role = Viewer
 276 default_theme = dark
 277
 278 [auth.basic]
 279 enabled = true
 280
 281 [auth]
 282 disable_login_form = false
 283 disable_signout_menu = false
 284
 285 [auth.anonymous]
 286 enabled = false
 287
 288 [log]
 289 mode = console
 290 level = info
 291
 292 [log.console]
 293 level = info
 294 format = console
 295 EOH
 296       }
 297
 298       # The service stanza instructs Nomad to register a service with Consul.
 299       #
 300       # For more information and examples on the "task" stanza, please see
 301       # the online documentation at:
 302       #
 303       #     https://www.nomadproject.io/docs/job-specification/service
 304       #
 305       service {
 306         name              = "${service_name}"
 307         port              = "${service_name}"
 308         tags              = [ "${service_name}$${NOMAD_ALLOC_INDEX}" ]
 309         check {
 310           name            = "Grafana Check Live"
 311           type            = "http"
 312           protocol        = "http"
 313           tls_skip_verify = true
 314           path            = "/api/health"
 315           interval        = "10s"
 316           timeout         = "2s"
 317         }
 318       }
 319
 320       # The "resources" stanza describes the requirements a task needs to
 321       # execute. Resource requirements include memory, network, cpu, and more.
 322       # This ensures the task will execute on a machine that contains enough
 323       # resource capacity.
 324       #
 325       # For more information and examples on the "resources" stanza, please see
 326       # the online documentation at:
 327       #
 328       #     https://www.nomadproject.io/docs/job-specification/resources
 329       #
 330       resources {
 331         cpu               = ${cpu}
 332         memory            = ${mem}
 333         # The network stanza specifies the networking requirements for the task
 334         # group, including the network mode and port allocations. When scheduling
 335         # jobs in Nomad they are provisioned across your fleet of machines along
 336         # with other jobs and services. Because you don't know in advance what host
 337         # your job will be provisioned on, Nomad will provide your tasks with
 338         # network configuration when they start up.
 339         #
 340         # For more information and examples on the "template" stanza, please see
 341         # the online documentation at:
 342         #
 343         #     https://www.nomadproject.io/docs/job-specification/network
 344         #
 345         network {
 346           port "${service_name}" {
 347             static        = ${port}
 348           }
 349         }
 350       }
 351     }
 352   }
 353 }