aboutsummaryrefslogtreecommitdiffstats
path: root/fdio.infra.terraform/terraform-nomad-loki
diff options
context:
space:
mode:
Diffstat (limited to 'fdio.infra.terraform/terraform-nomad-loki')
-rw-r--r--fdio.infra.terraform/terraform-nomad-loki/conf/nomad/loki.hcl.tftpl261
-rw-r--r--fdio.infra.terraform/terraform-nomad-loki/main.tf40
-rw-r--r--fdio.infra.terraform/terraform-nomad-loki/variables.tf127
-rw-r--r--fdio.infra.terraform/terraform-nomad-loki/versions.tf9
4 files changed, 437 insertions, 0 deletions
diff --git a/fdio.infra.terraform/terraform-nomad-loki/conf/nomad/loki.hcl.tftpl b/fdio.infra.terraform/terraform-nomad-loki/conf/nomad/loki.hcl.tftpl
new file mode 100644
index 0000000000..7b38437566
--- /dev/null
+++ b/fdio.infra.terraform/terraform-nomad-loki/conf/nomad/loki.hcl.tftpl
@@ -0,0 +1,261 @@
+job "${job_name}" {
+ # The "region" parameter specifies the region in which to execute the job.
+ # If omitted, this inherits the default region name of "global".
+ # region = "${region}"
+
+ # The "datacenters" parameter specifies the list of datacenters which should
+ # be considered when placing this task. This must be provided.
+ datacenters = "${datacenters}"
+
+ # The "type" parameter controls the type of job, which impacts the scheduler's
+ # decision on placement. This configuration is optional and defaults to
+ # "service". For a full list of job types and their differences, please see
+ # the online documentation.
+ #
+ # https://www.nomadproject.io/docs/jobspec/schedulers
+ #
+ type = "service"
+
+ update {
+ # The "max_parallel" parameter specifies the maximum number of updates to
+ # perform in parallel. In this case, this specifies to update a single task
+ # at a time.
+ max_parallel = ${max_parallel}
+
+ health_check = "checks"
+
+ # The "min_healthy_time" parameter specifies the minimum time the allocation
+ # must be in the healthy state before it is marked as healthy and unblocks
+ # further allocations from being updated.
+ min_healthy_time = "10s"
+
+ # The "healthy_deadline" parameter specifies the deadline in which the
+ # allocation must be marked as healthy after which the allocation is
+ # automatically transitioned to unhealthy. Transitioning to unhealthy will
+ # fail the deployment and potentially roll back the job if "auto_revert" is
+ # set to true.
+ healthy_deadline = "3m"
+
+ # The "progress_deadline" parameter specifies the deadline in which an
+ # allocation must be marked as healthy. The deadline begins when the first
+ # allocation for the deployment is created and is reset whenever an allocation
+ # as part of the deployment transitions to a healthy state. If no allocation
+ # transitions to the healthy state before the progress deadline, the
+ # deployment is marked as failed.
+ progress_deadline = "10m"
+
+%{ if use_canary }
+ # The "canary" parameter specifies that changes to the job that would result
+ # in destructive updates should create the specified number of canaries
+ # without stopping any previous allocations. Once the operator determines the
+ # canaries are healthy, they can be promoted which unblocks a rolling update
+ # of the remaining allocations at a rate of "max_parallel".
+ #
+ # Further, setting "canary" equal to the count of the task group allows
+ # blue/green deployments. When the job is updated, a full set of the new
+ # version is deployed and upon promotion the old version is stopped.
+ canary = ${canary}
+
+ # Specifies if the job should auto-promote to the canary version when all
+ # canaries become healthy during a deployment. Defaults to false which means
+ # canaries must be manually updated with the nomad deployment promote
+ # command.
+ auto_promote = ${auto_promote}
+
+ # The "auto_revert" parameter specifies if the job should auto-revert to the
+ # last stable job on deployment failure. A job is marked as stable if all the
+ # allocations as part of its deployment were marked healthy.
+ auto_revert = ${auto_revert}
+%{ endif }
+ }
+
+ # The "group" stanza defines a series of tasks that should be co-located on
+ # the same Nomad client. Any task within a group will be placed on the same
+ # client.
+ #
+ # https://www.nomadproject.io/docs/job-specification/group
+ #
+ group "${job_name}-group-1" {
+ # The "count" parameter specifies the number of the task groups that should
+ # be running under this group. This value must be non-negative and defaults
+ # to 1.
+ count = ${group_count}
+
+ # The volume stanza allows the group to specify that it requires a given
+ # volume from the cluster. The key of the stanza is the name of the volume
+ # as it will be exposed to task configuration.
+ #
+ # https://www.nomadproject.io/docs/job-specification/volume
+ %{ if use_host_volume }
+ volume "${job_name}-volume-1" {
+ type = "host"
+ read_only = false
+ source = "${volume_source}"
+ }
+ %{ endif }
+
+ # The restart stanza configures a tasks's behavior on task failure. Restarts
+ # happen on the client that is running the task.
+ #
+ # https://www.nomadproject.io/docs/job-specification/restart
+ #
+ restart {
+ interval = "30m"
+ attempts = 40
+ delay = "15s"
+ mode = "delay"
+ }
+
+ # The constraint allows restricting the set of eligible nodes. Constraints
+ # may filter on attributes or client metadata.
+ #
+ # https://www.nomadproject.io/docs/job-specification/constraint
+ #
+ constraint {
+ attribute = "$${attr.cpu.arch}"
+ operator = "!="
+ value = "arm64"
+ }
+
+ constraint {
+ attribute = "$${node.class}"
+ value = "builder"
+ }
+
+ # The network stanza specifies the networking requirements for the task
+ # group, including the network mode and port allocations. When scheduling
+ # jobs in Nomad they are provisioned across your fleet of machines along
+ # with other jobs and services. Because you don't know in advance what host
+ # your job will be provisioned on, Nomad will provide your tasks with
+ # network configuration when they start up.
+ #
+ # https://www.nomadproject.io/docs/job-specification/network
+ #
+ network {
+ port "${service_name}" {
+ static = ${port}
+ to = ${port}
+ }
+ }
+
+ # The "task" stanza creates an individual unit of work, such as a Docker
+ # container, web application, or batch processing.
+ #
+ # https://www.nomadproject.io/docs/job-specification/task
+ #
+ task "${job_name}-task-1" {
+ # The "driver" parameter specifies the task driver that should be used to
+ # run the task.
+ driver = "exec"
+
+ %{ if use_host_volume }
+ volume_mount {
+ volume = "${job_name}-volume-1"
+ destination = "${volume_destination}"
+ read_only = false
+ }
+ %{ endif }
+
+ %{ if use_vault_provider }
+ vault {
+ policies = "${vault_kv_policy_name}"
+ }
+ %{ endif }
+
+ # The "config" stanza specifies the driver configuration, which is passed
+ # directly to the driver to start the task. The details of configurations
+ # are specific to each driver, so please see specific driver
+ # documentation for more information.
+ config {
+ command = "local/loki-linux-amd64"
+ }
+
+ # The artifact stanza instructs Nomad to fetch and unpack a remote resource,
+ # such as a file, tarball, or binary. Nomad downloads artifacts using the
+ # popular go-getter library, which permits downloading artifacts from a
+ # variety of locations using a URL as the input source.
+ #
+ # https://www.nomadproject.io/docs/job-specification/artifact
+ #
+ artifact {
+ source = "${url}"
+ args = [
+ "-config.file secrets/config.yml"
+ ]
+ }
+
+ template {
+ change_mode = "noop"
+ change_signal = "SIGINT"
+ destination = "secrets/loki.yml"
+ data = <<EOH
+---
+auth_enabled: false
+
+server:
+ http_listen_port: 3100
+ http_listen_address: 127.0.0.1
+
+schema_config:
+ configs:
+ - from: 2020-05-15
+ store: boltdb
+ object_store: filesystem
+ schema: v11
+ index:
+ prefix: index_
+ period: 168h
+
+storage_config:
+ boltdb:
+ directory: /tmp/loki/index
+
+ filesystem:
+ directory: /tmp/loki/chunks
+
+ aws:
+ bucketnames: loki
+ endpoint: http://storage.service.consul:9000
+ access_key_id: storage
+ secret_access_key: Storage1234
+ insecure: false
+ sse_encryption: false
+ http_config:
+ idle_conn_timeout: 90s
+ response_header_timeout: 0s
+ insecure_skip_verify: false
+ s3forcepathstyle: true
+EOH
+ }
+
+ # The service stanza instructs Nomad to register a service with Consul.
+ #
+ # https://www.nomadproject.io/docs/job-specification/service
+ #
+ service {
+ name = "${service_name}"
+ port = "${service_name}"
+ tags = [ "${service_name}$${NOMAD_ALLOC_INDEX}" ]
+ check {
+ name = "Loki Check Live"
+ type = "http"
+ path = "/-/healthy"
+ interval = "10s"
+ timeout = "2s"
+ }
+ }
+
+ # The "resources" stanza describes the requirements a task needs to
+ # execute. Resource requirements include memory, network, cpu, and more.
+ # This ensures the task will execute on a machine that contains enough
+ # resource capacity.
+ #
+ # https://www.nomadproject.io/docs/job-specification/resources
+ #
+ resources {
+ cpu = ${cpu}
+ memory = ${memory}
+ }
+ }
+ }
+}
diff --git a/fdio.infra.terraform/terraform-nomad-loki/main.tf b/fdio.infra.terraform/terraform-nomad-loki/main.tf
new file mode 100644
index 0000000000..a2fc70d254
--- /dev/null
+++ b/fdio.infra.terraform/terraform-nomad-loki/main.tf
@@ -0,0 +1,40 @@
+locals {
+ datacenters = join(",", var.datacenters)
+ url = join("",
+ [
+ "https://github.com",
+ "/grafana/loki/releases/download/v${var.gl_version}/loki-linux-amd64.zip"
+ ]
+ )
+}
+
+resource "nomad_job" "nomad_job_prometheus" {
+ jobspec = templatefile(
+ "${path.module}/conf/nomad/loki.hcl.tftpl",
+ {
+ auto_promote = var.auto_promote,
+ auto_revert = var.auto_revert,
+ canary = var.canary,
+ cpu = var.cpu,
+ datacenters = local.datacenters,
+ group_count = var.group_count,
+ job_name = var.job_name,
+ max_parallel = var.max_parallel,
+ memory = var.memory
+ port = var.port,
+ region = var.region,
+ service_name = var.service_name,
+ url = local.url,
+ use_canary = var.use_canary,
+ use_host_volume = var.use_host_volume,
+ use_vault_provider = var.vault_secret.use_vault_provider,
+ vault_kv_policy_name = var.vault_secret.vault_kv_policy_name,
+ vault_kv_path = var.vault_secret.vault_kv_path,
+ vault_kv_field_access_key = var.vault_secret.vault_kv_field_access_key,
+ vault_kv_field_secret_key = var.vault_secret.vault_kv_field_secret_key,
+ version = var.gl_version,
+ volume_destination = var.volume_destination,
+ volume_source = var.volume_source
+ })
+ detach = false
+}
diff --git a/fdio.infra.terraform/terraform-nomad-loki/variables.tf b/fdio.infra.terraform/terraform-nomad-loki/variables.tf
new file mode 100644
index 0000000000..049290f5a8
--- /dev/null
+++ b/fdio.infra.terraform/terraform-nomad-loki/variables.tf
@@ -0,0 +1,127 @@
+# Nomad
+variable "datacenters" {
+ description = "Specifies the list of DCs to be considered placing this task"
+ type = list(string)
+ default = ["dc1"]
+}
+
+variable "region" {
+ description = "Specifies the list of DCs to be considered placing this task"
+ type = string
+ default = "global"
+}
+
+variable "volume_source" {
+ description = "The name of the volume to request"
+ type = string
+ default = ""
+}
+
+# Grafana Loki
+variable "gl_version" {
+ description = "Grafana Loki version"
+ type = string
+ default = "2.4.2"
+}
+
+variable "auto_promote" {
+ description = "Specifies if the job should auto-promote to the canary version"
+ type = bool
+ default = true
+}
+
+variable "auto_revert" {
+ description = "Specifies if the job should auto-revert to the last stable job"
+ type = bool
+ default = true
+}
+
+variable "canary" {
+ description = "Equal to the count of the task group allows blue/green depl."
+ type = number
+ default = 1
+}
+
+variable "cpu" {
+ description = "CPU allocation"
+ type = number
+ default = 2000
+}
+
+variable "data_dir" {
+ description = "Loki data dir allocation"
+ type = string
+ default = ""
+}
+
+variable "group_count" {
+ description = "Specifies the number of the task groups running under this one"
+ type = number
+ default = 1
+}
+
+variable "job_name" {
+ description = "Specifies a name for the job"
+ type = string
+ default = "loki"
+}
+
+variable "max_parallel" {
+ description = "Specifies the maximum number of updates to perform in parallel"
+ type = number
+ default = 1
+}
+
+variable "memory" {
+ description = "Specifies the memory required in MB"
+ type = number
+ default = 4096
+}
+
+variable "port" {
+ description = "Specifies the static TCP/UDP port to allocate"
+ type = number
+ default = 3100
+}
+
+variable "service_name" {
+ description = "Specifies the name this service will be advertised in Consul"
+ type = string
+ default = "loki"
+}
+
+variable "use_canary" {
+ description = "Uses canary deployment"
+ type = bool
+ default = true
+}
+
+variable "use_host_volume" {
+ description = "Use Nomad host volume feature"
+ type = bool
+ default = false
+}
+
+variable "volume_destination" {
+ description = "Specifies where the volume should be mounted inside the task"
+ type = string
+ default = ""
+}
+
+variable "vault_secret" {
+ type = object({
+ use_vault_provider = bool,
+ vault_kv_policy_name = string,
+ vault_kv_path = string,
+ vault_kv_field_access_key = string,
+ vault_kv_field_secret_key = string
+ })
+ description = "Set of properties to be able to fetch secret from vault."
+ default = {
+ use_vault_provider = false
+ vault_kv_policy_name = "kv"
+ vault_kv_path = "secret/data/prometheus"
+ vault_kv_field_access_key = "access_key"
+ vault_kv_field_secret_key = "secret_key"
+ }
+}
diff --git a/fdio.infra.terraform/terraform-nomad-loki/versions.tf b/fdio.infra.terraform/terraform-nomad-loki/versions.tf
new file mode 100644
index 0000000000..a01708f28a
--- /dev/null
+++ b/fdio.infra.terraform/terraform-nomad-loki/versions.tf
@@ -0,0 +1,9 @@
+terraform {
+ required_providers {
+ nomad = {
+ source = "hashicorp/nomad"
+ version = ">= 1.4.16"
+ }
+ }
+ required_version = ">= 1.1.4"
+}