fdio.infra.terraform/1n_nmd/grafana/conf/nomad/grafana.hcl

   1 job "${job_name}" {
   2   # The "region" parameter specifies the region in which to execute the job.
   3   # If omitted, this inherits the default region name of "global".
   4   # region = "global"
   5   #
   6   # The "datacenters" parameter specifies the list of datacenters which should
   7   # be considered when placing this task. This must be provided.
   8   datacenters         = "${datacenters}"
   9
  10   # The "type" parameter controls the type of job, which impacts the scheduler's
  11   # decision on placement. This configuration is optional and defaults to
  12   # "service". For a full list of job types and their differences, please see
  13   # the online documentation.
  14   #
  15   # For more information, please see the online documentation at:
  16   #
  17   #     https://www.nomadproject.io/docs/jobspec/schedulers
  18   #
  19   type                = "service"
  20
  21   update {
  22     # The "max_parallel" parameter specifies the maximum number of updates to
  23     # perform in parallel. In this case, this specifies to update a single task
  24     # at a time.
  25     max_parallel      = 1
  26
  27     health_check      = "checks"
  28
  29     # The "min_healthy_time" parameter specifies the minimum time the allocation
  30     # must be in the healthy state before it is marked as healthy and unblocks
  31     # further allocations from being updated.
  32     min_healthy_time  = "10s"
  33
  34     # The "healthy_deadline" parameter specifies the deadline in which the
  35     # allocation must be marked as healthy after which the allocation is
  36     # automatically transitioned to unhealthy. Transitioning to unhealthy will
  37     # fail the deployment and potentially roll back the job if "auto_revert" is
  38     # set to true.
  39     healthy_deadline  = "3m"
  40
  41     # The "progress_deadline" parameter specifies the deadline in which an
  42     # allocation must be marked as healthy. The deadline begins when the first
  43     # allocation for the deployment is created and is reset whenever an allocation
  44     # as part of the deployment transitions to a healthy state. If no allocation
  45     # transitions to the healthy state before the progress deadline, the
  46     # deployment is marked as failed.
  47     progress_deadline = "10m"
  48
  49 %{ if use_canary }
  50     # The "canary" parameter specifies that changes to the job that would result
  51     # in destructive updates should create the specified number of canaries
  52     # without stopping any previous allocations. Once the operator determines the
  53     # canaries are healthy, they can be promoted which unblocks a rolling update
  54     # of the remaining allocations at a rate of "max_parallel".
  55     #
  56     # Further, setting "canary" equal to the count of the task group allows
  57     # blue/green deployments. When the job is updated, a full set of the new
  58     # version is deployed and upon promotion the old version is stopped.
  59     canary            = 1
  60
  61     # Specifies if the job should auto-promote to the canary version when all
  62     # canaries become healthy during a deployment. Defaults to false which means
  63     # canaries must be manually updated with the nomad deployment promote
  64     # command.
  65     auto_promote      = true
  66
  67     # The "auto_revert" parameter specifies if the job should auto-revert to the
  68     # last stable job on deployment failure. A job is marked as stable if all the
  69     # allocations as part of its deployment were marked healthy.
  70     auto_revert       = true
  71 %{ endif }
  72   }
  73
  74   # The reschedule stanza specifies the group's rescheduling strategy. If
  75   # specified at the job level, the configuration will apply to all groups
  76   # within the job. If the reschedule stanza is present on both the job and the
  77   # group, they are merged with the group stanza taking the highest precedence
  78   # and then the job.
  79   reschedule {
  80     delay             = "30s"
  81     delay_function    = "constant"
  82     unlimited         = true
  83   }
  84
  85   # The "group" stanza defines a series of tasks that should be co-located on
  86   # the same Nomad client. Any task within a group will be placed on the same
  87   # client.
  88   #
  89   # For more information and examples on the "group" stanza, please see
  90   # the online documentation at:
  91   #
  92   #     https://www.nomadproject.io/docs/job-specification/group
  93   #
  94   group "prod-group1-${service_name}" {
  95     # The "count" parameter specifies the number of the task groups that should
  96     # be running under this group. This value must be non-negative and defaults
  97     # to 1.
  98     count             = ${group_count}
  99
 100     # The restart stanza configures a tasks's behavior on task failure. Restarts
 101     # happen on the client that is running the task.
 102     #
 103     # https://www.nomadproject.io/docs/job-specification/restart
 104     #
 105     restart {
 106       interval        = "30m"
 107       attempts        = 40
 108       delay           = "15s"
 109       mode            = "delay"
 110     }
 111
 112     # The constraint allows restricting the set of eligible nodes. Constraints
 113     # may filter on attributes or client metadata.
 114     #
 115     # For more information and examples on the "volume" stanza, please see
 116     # the online documentation at:
 117     #
 118     #     https://www.nomadproject.io/docs/job-specification/constraint
 119     #
 120     constraint {
 121       attribute       = "$${attr.cpu.arch}"
 122       operator        = "!="
 123       value           = "arm64"
 124     }
 125
 126     constraint {
 127       attribute      = "$${node.class}"
 128       value          = "builder"
 129     }
 130
 131     # The "task" stanza creates an individual unit of work, such as a Docker
 132     # container, web application, or batch processing.
 133     #
 134     # For more information and examples on the "task" stanza, please see
 135     # the online documentation at:
 136     #
 137     #     https://www.nomadproject.io/docs/job-specification/task
 138     #
 139     task "prod-task1-${service_name}" {
 140       # The "driver" parameter specifies the task driver that should be used to
 141       # run the task.
 142       driver          = "docker"
 143
 144       # The "config" stanza specifies the driver configuration, which is passed
 145       # directly to the driver to start the task. The details of configurations
 146       # are specific to each driver, so please see specific driver
 147       # documentation for more information.
 148       config {
 149         image         = "${image}"
 150         dns_servers   = [ "172.17.0.1" ]
 151         volumes       = [
 152           "secrets/prometheus.yml:/etc/grafana/provisioning/datasources/prometheus.yml",
 153           "secrets/dashboards.yml:/etc/grafana/provisioning/dashboards/dashboards.yml",
 154           "secrets/grafana.ini:/etc/grafana/grafana.ini",
 155           "secrets/node_exporter.json:/etc/grafana/provisioning/dashboards/node_exporter.json",
 156           "secrets/nomad.json:/etc/grafana/provisioning/dashboards/nomad.json",
 157           "secrets/consul.json:/etc/grafana/provisioning/dashboards/consul.json",
 158           "secrets/prometheus.json:/etc/grafana/provisioning/dashboards/prometheus.json",
 159           "secrets/blackbox_exporter_http.json:/etc/grafana/provisioning/dashboards/blackbox_exporter_http.json",
 160           "secrets/blackbox_exporter_icmp.json:/etc/grafana/provisioning/dashboards/blackbox_exporter_icmp.json"
 161         ]
 162       }
 163
 164       artifact {
 165         # Prometheus Node Exporter
 166         source        = "https://raw.githubusercontent.com/pmikus/grafana-dashboards/main/node_exporter.json"
 167         destination   = "secrets/"
 168       }
 169
 170       artifact {
 171         # Nomad
 172         source        = "https://raw.githubusercontent.com/pmikus/grafana-dashboards/main/nomad.json"
 173         destination   = "secrets/"
 174       }
 175
 176       artifact {
 177         # Consul
 178         source        = "https://raw.githubusercontent.com/pmikus/grafana-dashboards/main/consul.json"
 179         destination   = "secrets/"
 180       }
 181
 182       artifact {
 183         # Prometheus
 184         source        = "https://raw.githubusercontent.com/pmikus/grafana-dashboards/main/prometheus.json"
 185         destination   = "secrets/"
 186       }
 187
 188       artifact {
 189         # Prometheus Blackbox Exporter HTTP
 190         source        = "https://raw.githubusercontent.com/pmikus/grafana-dashboards/main/blackbox_exporter_http.json"
 191         destination   = "secrets/"
 192       }
 193
 194       artifact {
 195         # Prometheus Blackbox Exporter ICMP
 196         source        = "https://raw.githubusercontent.com/pmikus/grafana-dashboards/main/blackbox_exporter_icmp.json"
 197         destination   = "secrets/"
 198       }
 199
 200       # The "template" stanza instructs Nomad to manage a template, such as
 201       # a configuration file or script. This template can optionally pull data
 202       # from Consul or Vault to populate runtime configuration data.
 203       #
 204       # For more information and examples on the "template" stanza, please see
 205       # the online documentation at:
 206       #
 207       #     https://www.nomadproject.io/docs/job-specification/template
 208       #
 209       template {
 210         change_mode   = "noop"
 211         change_signal = "SIGINT"
 212         destination   = "secrets/prometheus.yml"
 213         data          = <<EOH
 214 apiVersion: 1
 215 datasources:
 216 - name: Prometheus
 217   type: prometheus
 218   access: direct
 219   orgId: 1
 220   url: http://prometheus.service.consul:9090
 221   basicAuth: false
 222   isDefault: true
 223   version: 1
 224   editable: false
 225 EOH
 226       }
 227
 228       template {
 229         change_mode   = "noop"
 230         change_signal = "SIGINT"
 231         destination   = "secrets/dashboards.yml"
 232         data          = <<EOH
 233 apiVersion: 1
 234 providers:
 235 - name: dashboards
 236   type: file
 237   disableDeletion: false
 238   updateIntervalSeconds: 10
 239   allowUiUpdates: false
 240   options:
 241     path: /etc/grafana/provisioning/dashboards
 242     foldersFromFilesStructure: true
 243 EOH
 244       }
 245
 246       template {
 247         change_mode   = "noop"
 248         change_signal = "SIGINT"
 249         destination   = "secrets/grafana.ini"
 250         data          = <<EOH
 251 app_mode = production
 252
 253 [metrics]
 254 enabled = true
 255
 256 [server]
 257 protocol = http
 258 http_port = ${port}
 259 root_url = http://${service_name}.service.consul:${port}
 260 enable_gzip = true
 261 ;cert_file =
 262 ;cert_key =
 263
 264 [security]
 265 admin_user = grafanauser
 266 admin_password = Grafana1234
 267 secret_key = SW2YcwTIb9zpOOhoPsMm
 268
 269 [users]
 270 allow_sign_up = false
 271 allow_org_create = false
 272 auto_assign_org = true
 273 auto_assign_org_role = Viewer
 274 default_theme = dark
 275
 276 [auth.basic]
 277 enabled = true
 278
 279 [auth]
 280 disable_login_form = false
 281 disable_signout_menu = false
 282
 283 [auth.anonymous]
 284 enabled = false
 285
 286 [log]
 287 mode = console
 288 level = info
 289
 290 [log.console]
 291 level = info
 292 format = console
 293 EOH
 294       }
 295
 296       # The service stanza instructs Nomad to register a service with Consul.
 297       #
 298       # For more information and examples on the "task" stanza, please see
 299       # the online documentation at:
 300       #
 301       #     https://www.nomadproject.io/docs/job-specification/service
 302       #
 303       service {
 304         name              = "${service_name}"
 305         port              = "${service_name}"
 306         tags              = [ "${service_name}$${NOMAD_ALLOC_INDEX}" ]
 307         check {
 308           name            = "Grafana Check Live"
 309           type            = "http"
 310           protocol        = "http"
 311           tls_skip_verify = true
 312           path            = "/api/health"
 313           interval        = "10s"
 314           timeout         = "2s"
 315         }
 316       }
 317
 318       # The "resources" stanza describes the requirements a task needs to
 319       # execute. Resource requirements include memory, network, cpu, and more.
 320       # This ensures the task will execute on a machine that contains enough
 321       # resource capacity.
 322       #
 323       # For more information and examples on the "resources" stanza, please see
 324       # the online documentation at:
 325       #
 326       #     https://www.nomadproject.io/docs/job-specification/resources
 327       #
 328       resources {
 329         cpu               = ${cpu}
 330         memory            = ${mem}
 331         # The network stanza specifies the networking requirements for the task
 332         # group, including the network mode and port allocations. When scheduling
 333         # jobs in Nomad they are provisioned across your fleet of machines along
 334         # with other jobs and services. Because you don't know in advance what host
 335         # your job will be provisioned on, Nomad will provide your tasks with
 336         # network configuration when they start up.
 337         #
 338         # For more information and examples on the "template" stanza, please see
 339         # the online documentation at:
 340         #
 341         #     https://www.nomadproject.io/docs/job-specification/network
 342         #
 343         network {
 344           port "${service_name}" {
 345             static        = ${port}
 346           }
 347         }
 348       }
 349     }
 350   }
 351 }