diff options
author | Peter Mikus <peter.mikus@protonmail.ch> | 2024-09-26 09:51:40 +0200 |
---|---|---|
committer | Peter Mikus <peter.mikus@protonmail.ch> | 2024-09-26 09:51:40 +0200 |
commit | 98b02c7f49efa6ef190edf2456cd090f2a859543 (patch) | |
tree | 0c83956f19002e12611495ae108222cc659c9005 | |
parent | 2cc6c3b852350a644e3996b6ab52dbb6f7a54309 (diff) |
feat(terraform): Refactor ETLoper-240930
Signed-off-by: Peter Mikus <peter.mikus@protonmail.ch>
Change-Id: I86711ef80304d72a701ef84737f503ee52659dc5
37 files changed, 597 insertions, 790 deletions
diff --git a/csit.infra.etl/coverage_device_rls2406.py b/csit.infra.etl/coverage_device_rls2410.py index 30a48247c0..48746daabb 100644 --- a/csit.infra.etl/coverage_device_rls2406.py +++ b/csit.infra.etl/coverage_device_rls2410.py @@ -141,7 +141,7 @@ paths = wr.s3.list_objects( ignore_empty=True ) -filtered_paths = [path for path in paths if "report-coverage-2406" in path] +filtered_paths = [path for path in paths if "report-coverage-2410" in path] out_sdf = process_json_to_dataframe("device", filtered_paths) out_sdf.printSchema() @@ -163,7 +163,7 @@ except KeyError: try: wr.s3.to_parquet( df=out_sdf.toPandas(), - path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/coverage_rls2406", + path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/coverage_rls2410", dataset=True, partition_cols=["test_type", "year", "month", "day"], compression="snappy", diff --git a/csit.infra.etl/coverage_hoststack_rls2406.py b/csit.infra.etl/coverage_hoststack_rls2410.py index 8a0a79b980..080e998b90 100644 --- a/csit.infra.etl/coverage_hoststack_rls2406.py +++ b/csit.infra.etl/coverage_hoststack_rls2410.py @@ -141,7 +141,7 @@ paths = wr.s3.list_objects( ignore_empty=True ) -filtered_paths = [path for path in paths if "report-coverage-2406" in path] +filtered_paths = [path for path in paths if "report-coverage-2410" in path] out_sdf = process_json_to_dataframe("hoststack", filtered_paths) out_sdf.printSchema() @@ -163,7 +163,7 @@ except KeyError: try: wr.s3.to_parquet( df=out_sdf.toPandas(), - path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/coverage_rls2406", + path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/coverage_rls2410", dataset=True, partition_cols=["test_type", "year", "month", "day"], compression="snappy", diff --git a/csit.infra.etl/coverage_mrr_rls2406.py b/csit.infra.etl/coverage_mrr_rls2410.py index 365159d171..9400d8d7fb 100644 --- a/csit.infra.etl/coverage_mrr_rls2406.py +++ b/csit.infra.etl/coverage_mrr_rls2410.py @@ -141,7 +141,7 @@ paths = wr.s3.list_objects( ignore_empty=True ) -filtered_paths = [path for path in paths if "report-coverage-2406" in path] +filtered_paths = [path for path in paths if "report-coverage-2410" in path] out_sdf = process_json_to_dataframe("mrr", filtered_paths) out_sdf.printSchema() @@ -163,7 +163,7 @@ except KeyError: try: wr.s3.to_parquet( df=out_sdf.toPandas(), - path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/coverage_rls2406", + path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/coverage_rls2410", dataset=True, partition_cols=["test_type", "year", "month", "day"], compression="snappy", diff --git a/csit.infra.etl/coverage_ndrpdr_rls2406.py b/csit.infra.etl/coverage_ndrpdr_rls2410.py index 8ad453e795..18b7627aa9 100644 --- a/csit.infra.etl/coverage_ndrpdr_rls2406.py +++ b/csit.infra.etl/coverage_ndrpdr_rls2410.py @@ -141,7 +141,7 @@ paths = wr.s3.list_objects( ignore_empty=True ) -filtered_paths = [path for path in paths if "report-coverage-2406" in path] +filtered_paths = [path for path in paths if "report-coverage-2410" in path] out_sdf = process_json_to_dataframe("ndrpdr", filtered_paths) out_sdf.printSchema() @@ -163,7 +163,7 @@ except KeyError: try: wr.s3.to_parquet( df=out_sdf.toPandas(), - path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/coverage_rls2406", + path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/coverage_rls2410", dataset=True, partition_cols=["test_type", "year", "month", "day"], compression="snappy", diff --git a/csit.infra.etl/coverage_reconf_rls2406.py b/csit.infra.etl/coverage_reconf_rls2410.py index 8685809c01..4e596c24c6 100644 --- a/csit.infra.etl/coverage_reconf_rls2406.py +++ b/csit.infra.etl/coverage_reconf_rls2410.py @@ -140,7 +140,7 @@ paths = wr.s3.list_objects( ignore_suffix=IGNORE_SUFFIX, ignore_empty=True -filtered_paths = [path for path in paths if "report-coverage-2406" in path] +filtered_paths = [path for path in paths if "report-coverage-2410" in path] out_sdf = process_json_to_dataframe("reconf", filtered_paths) out_sdf.printSchema() @@ -163,7 +163,7 @@ except KeyError: try: wr.s3.to_parquet( df=out_sdf.toPandas(), - path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/coverage_rls2406", + path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/coverage_rls2410", dataset=True, partition_cols=["test_type", "year", "month", "day"], compression="snappy", diff --git a/csit.infra.etl/coverage_soak_rls2406.py b/csit.infra.etl/coverage_soak_rls2410.py index 6f06f2c075..5dc9fb7ed0 100644 --- a/csit.infra.etl/coverage_soak_rls2406.py +++ b/csit.infra.etl/coverage_soak_rls2410.py @@ -141,7 +141,7 @@ paths = wr.s3.list_objects( ignore_empty=True ) -filtered_paths = [path for path in paths if "report-coverage-2406" in path] +filtered_paths = [path for path in paths if "report-coverage-2410" in path] out_sdf = process_json_to_dataframe("soak", filtered_paths) out_sdf.printSchema() @@ -163,7 +163,7 @@ except KeyError: try: wr.s3.to_parquet( df=out_sdf.toPandas(), - path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/coverage_rls2406", + path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/coverage_rls2410", dataset=True, partition_cols=["test_type", "year", "month", "day"], compression="snappy", diff --git a/csit.infra.etl/iterative_hoststack_rls2406.py b/csit.infra.etl/iterative_hoststack_rls2410.py index 7767cefa78..6d9e3d633f 100644 --- a/csit.infra.etl/iterative_hoststack_rls2406.py +++ b/csit.infra.etl/iterative_hoststack_rls2410.py @@ -141,7 +141,7 @@ paths = wr.s3.list_objects( ignore_empty=True ) -filtered_paths = [path for path in paths if "report-iterative-2406" in path] +filtered_paths = [path for path in paths if "report-iterative-2410" in path] out_sdf = process_json_to_dataframe("hoststack", filtered_paths) out_sdf.printSchema() @@ -163,7 +163,7 @@ except KeyError: try: wr.s3.to_parquet( df=out_sdf.toPandas(), - path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/iterative_rls2406", + path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/iterative_rls2410", dataset=True, partition_cols=["test_type", "year", "month", "day"], compression="snappy", diff --git a/csit.infra.etl/iterative_mrr_rls2406.py b/csit.infra.etl/iterative_mrr_rls2410.py index a1a8d96e26..d74e6a90f3 100644 --- a/csit.infra.etl/iterative_mrr_rls2406.py +++ b/csit.infra.etl/iterative_mrr_rls2410.py @@ -141,7 +141,7 @@ paths = wr.s3.list_objects( ignore_empty=True ) -filtered_paths = [path for path in paths if "report-iterative-2406" in path] +filtered_paths = [path for path in paths if "report-iterative-2410" in path] out_sdf = process_json_to_dataframe("mrr", filtered_paths) out_sdf.printSchema() @@ -163,7 +163,7 @@ except KeyError: try: wr.s3.to_parquet( df=out_sdf.toPandas(), - path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/iterative_rls2406", + path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/iterative_rls2410", dataset=True, partition_cols=["test_type", "year", "month", "day"], compression="snappy", diff --git a/csit.infra.etl/iterative_ndrpdr_rls2406.py b/csit.infra.etl/iterative_ndrpdr_rls2410.py index bb474f1d7f..0530dc8ef5 100644 --- a/csit.infra.etl/iterative_ndrpdr_rls2406.py +++ b/csit.infra.etl/iterative_ndrpdr_rls2410.py @@ -141,7 +141,7 @@ paths = wr.s3.list_objects( ignore_empty=True ) -filtered_paths = [path for path in paths if "report-iterative-2406" in path] +filtered_paths = [path for path in paths if "report-iterative-2410" in path] out_sdf = process_json_to_dataframe("ndrpdr", filtered_paths) out_sdf.printSchema() @@ -163,7 +163,7 @@ except KeyError: try: wr.s3.to_parquet( df=out_sdf.toPandas(), - path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/iterative_rls2406", + path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/iterative_rls2410", dataset=True, partition_cols=["test_type", "year", "month", "day"], compression="snappy", diff --git a/csit.infra.etl/iterative_reconf_rls2406.py b/csit.infra.etl/iterative_reconf_rls2410.py index 71e92b49f8..41d2aedcc1 100644 --- a/csit.infra.etl/iterative_reconf_rls2406.py +++ b/csit.infra.etl/iterative_reconf_rls2410.py @@ -141,7 +141,7 @@ paths = wr.s3.list_objects( ignore_empty=True ) -filtered_paths = [path for path in paths if "report-iterative-2406" in path] +filtered_paths = [path for path in paths if "report-iterative-2410" in path] out_sdf = process_json_to_dataframe("reconf", filtered_paths) out_sdf.show(truncate=False) @@ -164,7 +164,7 @@ except KeyError: try: wr.s3.to_parquet( df=out_sdf.toPandas(), - path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/iterative_rls2406", + path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/iterative_rls2410", dataset=True, partition_cols=["test_type", "year", "month", "day"], compression="snappy", diff --git a/csit.infra.etl/iterative_soak_rls2406.py b/csit.infra.etl/iterative_soak_rls2410.py index 966451539e..f581c7c1e9 100644 --- a/csit.infra.etl/iterative_soak_rls2406.py +++ b/csit.infra.etl/iterative_soak_rls2410.py @@ -141,7 +141,7 @@ paths = wr.s3.list_objects( ignore_empty=True ) -filtered_paths = [path for path in paths if "report-iterative-2406" in path] +filtered_paths = [path for path in paths if "report-iterative-2410" in path] out_sdf = process_json_to_dataframe("soak", filtered_paths) out_sdf.printSchema() @@ -163,7 +163,7 @@ except KeyError: try: wr.s3.to_parquet( df=out_sdf.toPandas(), - path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/iterative_rls2406", + path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/iterative_rls2410", dataset=True, partition_cols=["test_type", "year", "month", "day"], compression="snappy", diff --git a/fdio.infra.terraform/terraform-nomad-nomad-job/main.tf b/fdio.infra.terraform/terraform-nomad-nomad-job/main.tf new file mode 100644 index 0000000000..fc604fec7d --- /dev/null +++ b/fdio.infra.terraform/terraform-nomad-nomad-job/main.tf @@ -0,0 +1,33 @@ +locals { + datacenters = join(",", var.datacenters) + envs = join("\n", concat([], var.envs)) +} + +resource "nomad_job" "nomad_job" { + jobspec = templatefile( + "${path.cwd}/conf/nomad/${var.job_name}.hcl.tftpl", + { + aws_access_key_id = var.aws_access_key_id, + aws_secret_access_key = var.aws_secret_access_key, + aws_default_region = var.aws_default_region + cpu = var.cpu, + cron = var.cron, + datacenters = local.datacenters, + envs = local.envs, + image = var.image, + job_name = var.job_name, + memory = var.memory, + out_aws_access_key_id = var.out_aws_access_key_id, + out_aws_secret_access_key = var.out_aws_secret_access_key, + out_aws_default_region = var.out_aws_default_region + prohibit_overlap = var.prohibit_overlap, + time_zone = var.time_zone, + type = var.type, + use_vault_provider = var.vault_secret.use_vault_provider, + vault_kv_policy_name = var.vault_secret.vault_kv_policy_name, + vault_kv_path = var.vault_secret.vault_kv_path, + vault_kv_field_access_key = var.vault_secret.vault_kv_field_access_key, + vault_kv_field_secret_key = var.vault_secret.vault_kv_field_secret_key + }) + detach = false +} diff --git a/fdio.infra.terraform/terraform-nomad-nomad-job/variables.tf b/fdio.infra.terraform/terraform-nomad-nomad-job/variables.tf new file mode 100644 index 0000000000..86d1b45753 --- /dev/null +++ b/fdio.infra.terraform/terraform-nomad-nomad-job/variables.tf @@ -0,0 +1,115 @@ +# Nomad +variable "datacenters" { + description = "Specifies the list of DCs to be considered placing this task." + type = list(string) + default = ["dc1"] +} + +# ETL +variable "aws_access_key_id" { + description = "AWS access key." + type = string + default = "aws" +} + +variable "aws_secret_access_key" { + description = "AWS secret key" + type = string + default = "aws" +} + +variable "aws_default_region" { + description = "AWS region" + type = string + default = "aws" +} + +variable "cpu" { + description = "Specifies the CPU required to run this task in MHz." + type = number + default = 10000 +} + +variable "cron" { + description = "Specifies a cron expression configuring the interval to launch." + type = string + default = "@daily" +} + +variable "envs" { + description = "Specifies ETL environment variables." + type = list(string) + default = [] +} + +variable "image" { + description = "Specifies the Docker image to run." + type = string + default = "pmikus/docker-ubuntu-focal-aws-glue:latest" +} + +variable "job_name" { + description = "Specifies a name for the job." + type = string + default = "etl" +} + +variable "memory" { + description = "Specifies the memory required in MB." + type = number + default = 50000 +} + +variable "out_aws_access_key_id" { + description = "AWS access key." + type = string + default = "aws" +} + +variable "out_aws_secret_access_key" { + description = "AWS secret key" + type = string + default = "aws" +} + +variable "out_aws_default_region" { + description = "AWS region" + type = string + default = "aws" +} + +variable "prohibit_overlap" { + description = "Specifies if this job should wait until previous completed." + type = bool + default = true +} + +variable "time_zone" { + description = "Specifies the time zone to evaluate the next launch interval." + type = string + default = "UTC" +} + +variable "type" { + description = "Specifies the Nomad scheduler to use." + type = string + default = "batch" +} + +variable "vault_secret" { + type = object({ + use_vault_provider = bool, + vault_kv_policy_name = string, + vault_kv_path = string, + vault_kv_field_access_key = string, + vault_kv_field_secret_key = string + }) + description = "Set of properties to be able to fetch secret from vault." + default = { + use_vault_provider = true + vault_kv_policy_name = "kv" + vault_kv_path = "data/etl" + vault_kv_field_access_key = "access_key" + vault_kv_field_secret_key = "secret_key" + } +} diff --git a/fdio.infra.terraform/terraform-nomad-nomad-job/versions.tf b/fdio.infra.terraform/terraform-nomad-nomad-job/versions.tf new file mode 100644 index 0000000000..f40435fe77 --- /dev/null +++ b/fdio.infra.terraform/terraform-nomad-nomad-job/versions.tf @@ -0,0 +1,9 @@ +terraform { + required_providers { + nomad = { + source = "hashicorp/nomad" + version = ">= 1.4.20" + } + } + required_version = ">= 1.5.4" +} diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-ndrpdr-rls2406.hcl.tftpl b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-device-rls2410.hcl.tftpl index 383087803c..cba9954501 100644 --- a/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-ndrpdr-rls2406.hcl.tftpl +++ b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-device-rls2410.hcl.tftpl @@ -1,5 +1,5 @@ job "${job_name}" { - datacenters = "${datacenters}" + datacenters = ["${datacenters}"] type = "${type}" periodic { cron = "${cron}" @@ -33,7 +33,7 @@ job "${job_name}" { "--executor-memory", "20g", "--executor-cores", "2", "--master", "local[2]", - "coverage_ndrpdr_rls2406.py" + "coverage_device_rls2410.py" ] work_dir = "/local/csit/csit.infra.etl" } diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-hoststack-rls2410.hcl.tftpl b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-hoststack-rls2410.hcl.tftpl new file mode 100644 index 0000000000..34518d3c48 --- /dev/null +++ b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-hoststack-rls2410.hcl.tftpl @@ -0,0 +1,55 @@ +job "${job_name}" { + datacenters = ["${datacenters}"] + type = "${type}" + periodic { + cron = "${cron}" + prohibit_overlap = "${prohibit_overlap}" + time_zone = "${time_zone}" + } + group "${job_name}" { + restart { + mode = "fail" + } + constraint { + attribute = "$${attr.cpu.arch}" + operator = "!=" + value = "arm64" + } + constraint { + attribute = "$${node.class}" + value = "builder" + } + task "${job_name}" { + artifact { + source = "git::https://github.com/FDio/csit" + destination = "local/csit" + } + driver = "docker" + config { + image = "${image}" + command = "gluesparksubmit" + args = [ + "--driver-memory", "20g", + "--executor-memory", "20g", + "--executor-cores", "2", + "--master", "local[2]", + "coverage_hoststack_rls2410.py" + ] + work_dir = "/local/csit/csit.infra.etl" + } + env { + AWS_ACCESS_KEY_ID = "${aws_access_key_id}" + AWS_SECRET_ACCESS_KEY = "${aws_secret_access_key}" + AWS_DEFAULT_REGION = "${aws_default_region}" + OUT_AWS_ACCESS_KEY_ID = "${out_aws_access_key_id}" + OUT_AWS_SECRET_ACCESS_KEY = "${out_aws_secret_access_key}" + OUT_AWS_DEFAULT_REGION = "${out_aws_default_region}" + ${ envs } + } + resources { + cpu = ${cpu} + memory = ${memory} + } + } + } +} diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-soak-rls2406.hcl.tftpl b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-mrr-rls2410.hcl.tftpl index df3944df48..e5defb6ead 100644 --- a/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-soak-rls2406.hcl.tftpl +++ b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-mrr-rls2410.hcl.tftpl @@ -1,5 +1,5 @@ job "${job_name}" { - datacenters = "${datacenters}" + datacenters = ["${datacenters}"] type = "${type}" periodic { cron = "${cron}" @@ -33,7 +33,7 @@ job "${job_name}" { "--executor-memory", "20g", "--executor-cores", "2", "--master", "local[2]", - "coverage_soak_rls2406.py" + "coverage_mrr_rls2410.py" ] work_dir = "/local/csit/csit.infra.etl" } diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-ndrpdr-rls2410.hcl.tftpl b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-ndrpdr-rls2410.hcl.tftpl new file mode 100644 index 0000000000..49569f2a97 --- /dev/null +++ b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-ndrpdr-rls2410.hcl.tftpl @@ -0,0 +1,55 @@ +job "${job_name}" { + datacenters = ["${datacenters}"] + type = "${type}" + periodic { + cron = "${cron}" + prohibit_overlap = "${prohibit_overlap}" + time_zone = "${time_zone}" + } + group "${job_name}" { + restart { + mode = "fail" + } + constraint { + attribute = "$${attr.cpu.arch}" + operator = "!=" + value = "arm64" + } + constraint { + attribute = "$${node.class}" + value = "builder" + } + task "${job_name}" { + artifact { + source = "git::https://github.com/FDio/csit" + destination = "local/csit" + } + driver = "docker" + config { + image = "${image}" + command = "gluesparksubmit" + args = [ + "--driver-memory", "20g", + "--executor-memory", "20g", + "--executor-cores", "2", + "--master", "local[2]", + "coverage_ndrpdr_rls2410.py" + ] + work_dir = "/local/csit/csit.infra.etl" + } + env { + AWS_ACCESS_KEY_ID = "${aws_access_key_id}" + AWS_SECRET_ACCESS_KEY = "${aws_secret_access_key}" + AWS_DEFAULT_REGION = "${aws_default_region}" + OUT_AWS_ACCESS_KEY_ID = "${out_aws_access_key_id}" + OUT_AWS_SECRET_ACCESS_KEY = "${out_aws_secret_access_key}" + OUT_AWS_DEFAULT_REGION = "${out_aws_default_region}" + ${ envs } + } + resources { + cpu = ${cpu} + memory = ${memory} + } + } + } +} diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-reconf-rls2410.hcl.tftpl b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-reconf-rls2410.hcl.tftpl new file mode 100644 index 0000000000..ad973cc5c3 --- /dev/null +++ b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-reconf-rls2410.hcl.tftpl @@ -0,0 +1,55 @@ +job "${job_name}" { + datacenters = ["${datacenters}"] + type = "${type}" + periodic { + cron = "${cron}" + prohibit_overlap = "${prohibit_overlap}" + time_zone = "${time_zone}" + } + group "${job_name}" { + restart { + mode = "fail" + } + constraint { + attribute = "$${attr.cpu.arch}" + operator = "!=" + value = "arm64" + } + constraint { + attribute = "$${node.class}" + value = "builder" + } + task "${job_name}" { + artifact { + source = "git::https://github.com/FDio/csit" + destination = "local/csit" + } + driver = "docker" + config { + image = "${image}" + command = "gluesparksubmit" + args = [ + "--driver-memory", "20g", + "--executor-memory", "20g", + "--executor-cores", "2", + "--master", "local[2]", + "coverage_reconf_rls2410.py" + ] + work_dir = "/local/csit/csit.infra.etl" + } + env { + AWS_ACCESS_KEY_ID = "${aws_access_key_id}" + AWS_SECRET_ACCESS_KEY = "${aws_secret_access_key}" + AWS_DEFAULT_REGION = "${aws_default_region}" + OUT_AWS_ACCESS_KEY_ID = "${out_aws_access_key_id}" + OUT_AWS_SECRET_ACCESS_KEY = "${out_aws_secret_access_key}" + OUT_AWS_DEFAULT_REGION = "${out_aws_default_region}" + ${ envs } + } + resources { + cpu = ${cpu} + memory = ${memory} + } + } + } +} diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-device-rls2406.hcl.tftpl b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-soak-rls2410.hcl.tftpl index 386dc125cd..f1426328da 100644 --- a/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-device-rls2406.hcl.tftpl +++ b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-soak-rls2410.hcl.tftpl @@ -1,5 +1,5 @@ job "${job_name}" { - datacenters = "${datacenters}" + datacenters = ["${datacenters}"] type = "${type}" periodic { cron = "${cron}" @@ -33,7 +33,7 @@ job "${job_name}" { "--executor-memory", "20g", "--executor-cores", "2", "--master", "local[2]", - "coverage_device_rls2406.py" + "coverage_soak_rls2410.py" ] work_dir = "/local/csit/csit.infra.etl" } diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-hoststack-rls2406.hcl.tftpl b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-hoststack-rls2406.hcl.tftpl deleted file mode 100644 index 534fa04362..0000000000 --- a/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-hoststack-rls2406.hcl.tftpl +++ /dev/null @@ -1,55 +0,0 @@ -job "${job_name}" { - datacenters = "${datacenters}" - type = "${type}" - periodic { - cron = "${cron}" - prohibit_overlap = "${prohibit_overlap}" - time_zone = "${time_zone}" - } - group "${job_name}" { - restart { - mode = "fail" - } - constraint { - attribute = "$${attr.cpu.arch}" - operator = "!=" - value = "arm64" - } - constraint { - attribute = "$${node.class}" - value = "builder" - } - task "${job_name}" { - artifact { - source = "git::https://github.com/FDio/csit" - destination = "local/csit" - } - driver = "docker" - config { - image = "${image}" - command = "gluesparksubmit" - args = [ - "--driver-memory", "20g", - "--executor-memory", "20g", - "--executor-cores", "2", - "--master", "local[2]", - "iterative_hoststack_rls2406.py" - ] - work_dir = "/local/csit/csit.infra.etl" - } - env { - AWS_ACCESS_KEY_ID = "${aws_access_key_id}" - AWS_SECRET_ACCESS_KEY = "${aws_secret_access_key}" - AWS_DEFAULT_REGION = "${aws_default_region}" - OUT_AWS_ACCESS_KEY_ID = "${out_aws_access_key_id}" - OUT_AWS_SECRET_ACCESS_KEY = "${out_aws_secret_access_key}" - OUT_AWS_DEFAULT_REGION = "${out_aws_default_region}" - ${ envs } - } - resources { - cpu = ${cpu} - memory = ${memory} - } - } - } -} diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-hoststack-rls2410.hcl.tftpl b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-hoststack-rls2410.hcl.tftpl new file mode 100644 index 0000000000..4d7217ba51 --- /dev/null +++ b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-hoststack-rls2410.hcl.tftpl @@ -0,0 +1,55 @@ +job "${job_name}" { + datacenters = ["${datacenters}"] + type = "${type}" + periodic { + cron = "${cron}" + prohibit_overlap = "${prohibit_overlap}" + time_zone = "${time_zone}" + } + group "${job_name}" { + restart { + mode = "fail" + } + constraint { + attribute = "$${attr.cpu.arch}" + operator = "!=" + value = "arm64" + } + constraint { + attribute = "$${node.class}" + value = "builder" + } + task "${job_name}" { + artifact { + source = "git::https://github.com/FDio/csit" + destination = "local/csit" + } + driver = "docker" + config { + image = "${image}" + command = "gluesparksubmit" + args = [ + "--driver-memory", "20g", + "--executor-memory", "20g", + "--executor-cores", "2", + "--master", "local[2]", + "iterative_hoststack_rls2410.py" + ] + work_dir = "/local/csit/csit.infra.etl" + } + env { + AWS_ACCESS_KEY_ID = "${aws_access_key_id}" + AWS_SECRET_ACCESS_KEY = "${aws_secret_access_key}" + AWS_DEFAULT_REGION = "${aws_default_region}" + OUT_AWS_ACCESS_KEY_ID = "${out_aws_access_key_id}" + OUT_AWS_SECRET_ACCESS_KEY = "${out_aws_secret_access_key}" + OUT_AWS_DEFAULT_REGION = "${out_aws_default_region}" + ${ envs } + } + resources { + cpu = ${cpu} + memory = ${memory} + } + } + } +} diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-mrr-rls2406.hcl.tftpl b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-mrr-rls2406.hcl.tftpl deleted file mode 100644 index a1992bb651..0000000000 --- a/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-mrr-rls2406.hcl.tftpl +++ /dev/null @@ -1,55 +0,0 @@ -job "${job_name}" { - datacenters = "${datacenters}" - type = "${type}" - periodic { - cron = "${cron}" - prohibit_overlap = "${prohibit_overlap}" - time_zone = "${time_zone}" - } - group "${job_name}" { - restart { - mode = "fail" - } - constraint { - attribute = "$${attr.cpu.arch}" - operator = "!=" - value = "arm64" - } - constraint { - attribute = "$${node.class}" - value = "builder" - } - task "${job_name}" { - artifact { - source = "git::https://github.com/FDio/csit" - destination = "local/csit" - } - driver = "docker" - config { - image = "${image}" - command = "gluesparksubmit" - args = [ - "--driver-memory", "20g", - "--executor-memory", "20g", - "--executor-cores", "2", - "--master", "local[2]", - "iterative_mrr_rls2406.py" - ] - work_dir = "/local/csit/csit.infra.etl" - } - env { - AWS_ACCESS_KEY_ID = "${aws_access_key_id}" - AWS_SECRET_ACCESS_KEY = "${aws_secret_access_key}" - AWS_DEFAULT_REGION = "${aws_default_region}" - OUT_AWS_ACCESS_KEY_ID = "${out_aws_access_key_id}" - OUT_AWS_SECRET_ACCESS_KEY = "${out_aws_secret_access_key}" - OUT_AWS_DEFAULT_REGION = "${out_aws_default_region}" - ${ envs } - } - resources { - cpu = ${cpu} - memory = ${memory} - } - } - } -} diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-reconf-rls2406.hcl.tftpl b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-mrr-rls2410.hcl.tftpl index f03beb4840..ed0d1c0577 100644 --- a/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-reconf-rls2406.hcl.tftpl +++ b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-mrr-rls2410.hcl.tftpl @@ -1,5 +1,5 @@ job "${job_name}" { - datacenters = "${datacenters}" + datacenters = ["${datacenters}"] type = "${type}" periodic { cron = "${cron}" @@ -33,7 +33,7 @@ job "${job_name}" { "--executor-memory", "20g", "--executor-cores", "2", "--master", "local[2]", - "coverage_reconf_rls2406.py" + "iterative_mrr_rls2410.py" ] work_dir = "/local/csit/csit.infra.etl" } diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-ndrpdr-rls2406.hcl.tftpl b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-ndrpdr-rls2406.hcl.tftpl deleted file mode 100644 index 461dad8893..0000000000 --- a/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-ndrpdr-rls2406.hcl.tftpl +++ /dev/null @@ -1,55 +0,0 @@ -job "${job_name}" { - datacenters = "${datacenters}" - type = "${type}" - periodic { - cron = "${cron}" - prohibit_overlap = "${prohibit_overlap}" - time_zone = "${time_zone}" - } - group "${job_name}" { - restart { - mode = "fail" - } - constraint { - attribute = "$${attr.cpu.arch}" - operator = "!=" - value = "arm64" - } - constraint { - attribute = "$${node.class}" - value = "builder" - } - task "${job_name}" { - artifact { - source = "git::https://github.com/FDio/csit" - destination = "local/csit" - } - driver = "docker" - config { - image = "${image}" - command = "gluesparksubmit" - args = [ - "--driver-memory", "20g", - "--executor-memory", "20g", - "--executor-cores", "2", - "--master", "local[2]", - "iterative_ndrpdr_rls2406.py" - ] - work_dir = "/local/csit/csit.infra.etl" - } - env { - AWS_ACCESS_KEY_ID = "${aws_access_key_id}" - AWS_SECRET_ACCESS_KEY = "${aws_secret_access_key}" - AWS_DEFAULT_REGION = "${aws_default_region}" - OUT_AWS_ACCESS_KEY_ID = "${out_aws_access_key_id}" - OUT_AWS_SECRET_ACCESS_KEY = "${out_aws_secret_access_key}" - OUT_AWS_DEFAULT_REGION = "${out_aws_default_region}" - ${ envs } - } - resources { - cpu = ${cpu} - memory = ${memory} - } - } - } -} diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-hoststack-rls2406.hcl.tftpl b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-ndrpdr-rls2410.hcl.tftpl index babde4f734..5df38a2410 100644 --- a/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-hoststack-rls2406.hcl.tftpl +++ b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-ndrpdr-rls2410.hcl.tftpl @@ -1,5 +1,5 @@ job "${job_name}" { - datacenters = "${datacenters}" + datacenters = ["${datacenters}"] type = "${type}" periodic { cron = "${cron}" @@ -33,7 +33,7 @@ job "${job_name}" { "--executor-memory", "20g", "--executor-cores", "2", "--master", "local[2]", - "coverage_hoststack_rls2406.py" + "iterative_ndrpdr_rls2410.py" ] work_dir = "/local/csit/csit.infra.etl" } diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-reconf-rls2406.hcl.tftpl b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-reconf-rls2406.hcl.tftpl deleted file mode 100644 index d7f5daa478..0000000000 --- a/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-reconf-rls2406.hcl.tftpl +++ /dev/null @@ -1,55 +0,0 @@ -job "${job_name}" { - datacenters = "${datacenters}" - type = "${type}" - periodic { - cron = "${cron}" - prohibit_overlap = "${prohibit_overlap}" - time_zone = "${time_zone}" - } - group "${job_name}" { - restart { - mode = "fail" - } - constraint { - attribute = "$${attr.cpu.arch}" - operator = "!=" - value = "arm64" - } - constraint { - attribute = "$${node.class}" - value = "builder" - } - task "${job_name}" { - artifact { - source = "git::https://github.com/FDio/csit" - destination = "local/csit" - } - driver = "docker" - config { - image = "${image}" - command = "gluesparksubmit" - args = [ - "--driver-memory", "20g", - "--executor-memory", "20g", - "--executor-cores", "2", - "--master", "local[2]", - "iterative_reconf_rls2406.py" - ] - work_dir = "/local/csit/csit.infra.etl" - } - env { - AWS_ACCESS_KEY_ID = "${aws_access_key_id}" - AWS_SECRET_ACCESS_KEY = "${aws_secret_access_key}" - AWS_DEFAULT_REGION = "${aws_default_region}" - OUT_AWS_ACCESS_KEY_ID = "${out_aws_access_key_id}" - OUT_AWS_SECRET_ACCESS_KEY = "${out_aws_secret_access_key}" - OUT_AWS_DEFAULT_REGION = "${out_aws_default_region}" - ${ envs } - } - resources { - cpu = ${cpu} - memory = ${memory} - } - } - } -} diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-reconf-rls2410.hcl.tftpl b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-reconf-rls2410.hcl.tftpl new file mode 100644 index 0000000000..4337ef1f20 --- /dev/null +++ b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-reconf-rls2410.hcl.tftpl @@ -0,0 +1,55 @@ +job "${job_name}" { + datacenters = ["${datacenters}"] + type = "${type}" + periodic { + cron = "${cron}" + prohibit_overlap = "${prohibit_overlap}" + time_zone = "${time_zone}" + } + group "${job_name}" { + restart { + mode = "fail" + } + constraint { + attribute = "$${attr.cpu.arch}" + operator = "!=" + value = "arm64" + } + constraint { + attribute = "$${node.class}" + value = "builder" + } + task "${job_name}" { + artifact { + source = "git::https://github.com/FDio/csit" + destination = "local/csit" + } + driver = "docker" + config { + image = "${image}" + command = "gluesparksubmit" + args = [ + "--driver-memory", "20g", + "--executor-memory", "20g", + "--executor-cores", "2", + "--master", "local[2]", + "iterative_reconf_rls2410.py" + ] + work_dir = "/local/csit/csit.infra.etl" + } + env { + AWS_ACCESS_KEY_ID = "${aws_access_key_id}" + AWS_SECRET_ACCESS_KEY = "${aws_secret_access_key}" + AWS_DEFAULT_REGION = "${aws_default_region}" + OUT_AWS_ACCESS_KEY_ID = "${out_aws_access_key_id}" + OUT_AWS_SECRET_ACCESS_KEY = "${out_aws_secret_access_key}" + OUT_AWS_DEFAULT_REGION = "${out_aws_default_region}" + ${ envs } + } + resources { + cpu = ${cpu} + memory = ${memory} + } + } + } +} diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-soak-rls2406.hcl.tftpl b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-soak-rls2406.hcl.tftpl deleted file mode 100644 index 3bf8505925..0000000000 --- a/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-soak-rls2406.hcl.tftpl +++ /dev/null @@ -1,55 +0,0 @@ -job "${job_name}" { - datacenters = "${datacenters}" - type = "${type}" - periodic { - cron = "${cron}" - prohibit_overlap = "${prohibit_overlap}" - time_zone = "${time_zone}" - } - group "${job_name}" { - restart { - mode = "fail" - } - constraint { - attribute = "$${attr.cpu.arch}" - operator = "!=" - value = "arm64" - } - constraint { - attribute = "$${node.class}" - value = "builder" - } - task "${job_name}" { - artifact { - source = "git::https://github.com/FDio/csit" - destination = "local/csit" - } - driver = "docker" - config { - image = "${image}" - command = "gluesparksubmit" - args = [ - "--driver-memory", "20g", - "--executor-memory", "20g", - "--executor-cores", "2", - "--master", "local[2]", - "iterative_soak_rls2406.py" - ] - work_dir = "/local/csit/csit.infra.etl" - } - env { - AWS_ACCESS_KEY_ID = "${aws_access_key_id}" - AWS_SECRET_ACCESS_KEY = "${aws_secret_access_key}" - AWS_DEFAULT_REGION = "${aws_default_region}" - OUT_AWS_ACCESS_KEY_ID = "${out_aws_access_key_id}" - OUT_AWS_SECRET_ACCESS_KEY = "${out_aws_secret_access_key}" - OUT_AWS_DEFAULT_REGION = "${out_aws_default_region}" - ${ envs } - } - resources { - cpu = ${cpu} - memory = ${memory} - } - } - } -} diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-mrr-rls2406.hcl.tftpl b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-soak-rls2410.hcl.tftpl index c41c7b97a0..fb8db8c633 100644 --- a/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-mrr-rls2406.hcl.tftpl +++ b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-soak-rls2410.hcl.tftpl @@ -1,5 +1,5 @@ job "${job_name}" { - datacenters = "${datacenters}" + datacenters = ["${datacenters}"] type = "${type}" periodic { cron = "${cron}" @@ -33,7 +33,7 @@ job "${job_name}" { "--executor-memory", "20g", "--executor-cores", "2", "--master", "local[2]", - "coverage_mrr_rls2406.py" + "iterative_soak_rls2410.py" ] work_dir = "/local/csit/csit.infra.etl" } diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/fdio/main.tf b/fdio.infra.terraform/terraform-nomad-pyspark-etl/fdio/main.tf deleted file mode 100644 index 026ab168d9..0000000000 --- a/fdio.infra.terraform/terraform-nomad-pyspark-etl/fdio/main.tf +++ /dev/null @@ -1,292 +0,0 @@ -data "vault_kv_secret_v2" "fdio_logs" { - mount = "kv" - name = "etl/fdio_logs" -} - -data "vault_kv_secret_v2" "fdio_docs" { - mount = "kv" - name = "etl/fdio_docs" -} - -#data "vault_kv_secret_v2" "fdio_logs" { -# path = "kv/data/etl/fdio_logs" -#} -# -#data "vault_kv_secret_v2" "fdio_docs" { -# path = "kv/data/etl/fdio_docs" -#} - -module "etl-stats" { - providers = { - nomad = nomad.yul1 - } - source = "../" - - aws_access_key_id = data.vault_kv_secret_v2.fdio_logs.data.access_key - aws_secret_access_key = data.vault_kv_secret_v2.fdio_logs.data.secret_key - aws_default_region = data.vault_kv_secret_v2.fdio_logs.data.region - out_aws_access_key_id = data.vault_kv_secret_v2.fdio_docs.data.access_key - out_aws_secret_access_key = data.vault_kv_secret_v2.fdio_docs.data.secret_key - out_aws_default_region = data.vault_kv_secret_v2.fdio_docs.data.region - cron = "0 30 0 * * * *" - datacenters = ["yul1"] - job_name = "etl-stats" -} - -module "etl-trending-hoststack" { - providers = { - nomad = nomad.yul1 - } - source = "../" - - aws_access_key_id = data.vault_kv_secret_v2.fdio_logs.data.access_key - aws_secret_access_key = data.vault_kv_secret_v2.fdio_logs.data.secret_key - aws_default_region = data.vault_kv_secret_v2.fdio_logs.data.region - out_aws_access_key_id = data.vault_kv_secret_v2.fdio_docs.data.access_key - out_aws_secret_access_key = data.vault_kv_secret_v2.fdio_docs.data.secret_key - out_aws_default_region = data.vault_kv_secret_v2.fdio_docs.data.region - cron = "0 30 0 * * * *" - datacenters = ["yul1"] - job_name = "etl-trending-hoststack" -} - -module "etl-trending-mrr" { - providers = { - nomad = nomad.yul1 - } - source = "../" - - aws_access_key_id = data.vault_kv_secret_v2.fdio_logs.data.access_key - aws_secret_access_key = data.vault_kv_secret_v2.fdio_logs.data.secret_key - aws_default_region = data.vault_kv_secret_v2.fdio_logs.data.region - out_aws_access_key_id = data.vault_kv_secret_v2.fdio_docs.data.access_key - out_aws_secret_access_key = data.vault_kv_secret_v2.fdio_docs.data.secret_key - out_aws_default_region = data.vault_kv_secret_v2.fdio_docs.data.region - cron = "0 30 0 * * * *" - datacenters = ["yul1"] - job_name = "etl-trending-mrr" - memory = 60000 -} - -module "etl-trending-ndrpdr" { - providers = { - nomad = nomad.yul1 - } - source = "../" - - aws_access_key_id = data.vault_kv_secret_v2.fdio_logs.data.access_key - aws_secret_access_key = data.vault_kv_secret_v2.fdio_logs.data.secret_key - aws_default_region = data.vault_kv_secret_v2.fdio_logs.data.region - out_aws_access_key_id = data.vault_kv_secret_v2.fdio_docs.data.access_key - out_aws_secret_access_key = data.vault_kv_secret_v2.fdio_docs.data.secret_key - out_aws_default_region = data.vault_kv_secret_v2.fdio_docs.data.region - cron = "0 30 0 * * * *" - datacenters = ["yul1"] - job_name = "etl-trending-ndrpdr" - memory = 60000 -} - -module "etl-trending-soak" { - providers = { - nomad = nomad.yul1 - } - source = "../" - - aws_access_key_id = data.vault_kv_secret_v2.fdio_logs.data.access_key - aws_secret_access_key = data.vault_kv_secret_v2.fdio_logs.data.secret_key - aws_default_region = data.vault_kv_secret_v2.fdio_logs.data.region - out_aws_access_key_id = data.vault_kv_secret_v2.fdio_docs.data.access_key - out_aws_secret_access_key = data.vault_kv_secret_v2.fdio_docs.data.secret_key - out_aws_default_region = data.vault_kv_secret_v2.fdio_docs.data.region - cron = "0 30 0 * * * *" - datacenters = ["yul1"] - job_name = "etl-trending-soak" - memory = 60000 -} - -#module "etl-iterative-hoststack-rls2406" { -# providers = { -# nomad = nomad.yul1 -# } -# source = "../" -# -# aws_access_key_id = data.vault_kv_secret_v2.fdio_logs.data["access_key"] -# aws_secret_access_key = data.vault_kv_secret_v2.fdio_logs.data["secret_key"] -# aws_default_region = data.vault_kv_secret_v2.fdio_logs.data["region"] -# out_aws_access_key_id = data.vault_kv_secret_v2.fdio_docs.data["access_key"] -# out_aws_secret_access_key = data.vault_kv_secret_v2.fdio_docs.data["secret_key"] -# out_aws_default_region = data.vault_kv_secret_v2.fdio_docs.data["region"] -# cron = "0 30 0 * * * *" -# datacenters = ["yul1"] -# job_name = "etl-iterative-hoststack-rls2406" -#} -# -#module "etl-iterative-mrr-rls2406" { -# providers = { -# nomad = nomad.yul1 -# } -# source = "../" -# -# aws_access_key_id = data.vault_kv_secret_v2.fdio_logs.data["access_key"] -# aws_secret_access_key = data.vault_kv_secret_v2.fdio_logs.data["secret_key"] -# aws_default_region = data.vault_kv_secret_v2.fdio_logs.data["region"] -# out_aws_access_key_id = data.vault_kv_secret_v2.fdio_docs.data["access_key"] -# out_aws_secret_access_key = data.vault_kv_secret_v2.fdio_docs.data["secret_key"] -# out_aws_default_region = data.vault_kv_secret_v2.fdio_docs.data["region"] -# cron = "0 30 0 * * * *" -# datacenters = ["yul1"] -# job_name = "etl-iterative-mrr-rls2406" -#} -# -#module "etl-iterative-ndrpdr-rls2406" { -# providers = { -# nomad = nomad.yul1 -# } -# source = "../" -# -# aws_access_key_id = data.vault_kv_secret_v2.fdio_logs.data["access_key"] -# aws_secret_access_key = data.vault_kv_secret_v2.fdio_logs.data["secret_key"] -# aws_default_region = data.vault_kv_secret_v2.fdio_logs.data["region"] -# out_aws_access_key_id = data.vault_kv_secret_v2.fdio_docs.data["access_key"] -# out_aws_secret_access_key = data.vault_kv_secret_v2.fdio_docs.data["secret_key"] -# out_aws_default_region = data.vault_kv_secret_v2.fdio_docs.data["region"] -# cron = "0 30 0 * * * *" -# datacenters = ["yul1"] -# job_name = "etl-iterative-ndrpdr-rls2406" -#} -# -#module "etl-iterative-reconf-rls2406" { -# providers = { -# nomad = nomad.yul1 -# } -# source = "../" -# -# aws_access_key_id = data.vault_kv_secret_v2.fdio_logs.data["access_key"] -# aws_secret_access_key = data.vault_kv_secret_v2.fdio_logs.data["secret_key"] -# aws_default_region = data.vault_kv_secret_v2.fdio_logs.data["region"] -# out_aws_access_key_id = data.vault_kv_secret_v2.fdio_docs.data["access_key"] -# out_aws_secret_access_key = data.vault_kv_secret_v2.fdio_docs.data["secret_key"] -# out_aws_default_region = data.vault_kv_secret_v2.fdio_docs.data["region"] -# cron = "0 30 0 * * * *" -# datacenters = ["yul1"] -# job_name = "etl-iterative-reconf-rls2406" -#} -# -#module "etl-iterative-soak-rls2406" { -# providers = { -# nomad = nomad.yul1 -# } -# source = "../" -# -# aws_access_key_id = data.vault_kv_secret_v2.fdio_logs.data["access_key"] -# aws_secret_access_key = data.vault_kv_secret_v2.fdio_logs.data["secret_key"] -# aws_default_region = data.vault_kv_secret_v2.fdio_logs.data["region"] -# out_aws_access_key_id = data.vault_kv_secret_v2.fdio_docs.data["access_key"] -# out_aws_secret_access_key = data.vault_kv_secret_v2.fdio_docs.data["secret_key"] -# out_aws_default_region = data.vault_kv_secret_v2.fdio_docs.data["region"] -# cron = "0 30 0 * * * *" -# datacenters = ["yul1"] -# job_name = "etl-iterative-soak-rls2406" -#} -# -#module "etl-coverage-device-rls2406" { -# providers = { -# nomad = nomad.yul1 -# } -# source = "../" -# -# aws_access_key_id = data.vault_kv_secret_v2.fdio_logs.data["access_key"] -# aws_secret_access_key = data.vault_kv_secret_v2.fdio_logs.data["secret_key"] -# aws_default_region = data.vault_kv_secret_v2.fdio_logs.data["region"] -# out_aws_access_key_id = data.vault_kv_secret_v2.fdio_docs.data["access_key"] -# out_aws_secret_access_key = data.vault_kv_secret_v2.fdio_docs.data["secret_key"] -# out_aws_default_region = data.vault_kv_secret_v2.fdio_docs.data["region"] -# cron = "0 30 0 * * * *" -# datacenters = ["yul1"] -# job_name = "etl-coverage-device-rls2406" -#} -# -#module "etl-coverage-hoststack-rls2406" { -# providers = { -# nomad = nomad.yul1 -# } -# source = "../" -# -# aws_access_key_id = data.vault_kv_secret_v2.fdio_logs.data["access_key"] -# aws_secret_access_key = data.vault_kv_secret_v2.fdio_logs.data["secret_key"] -# aws_default_region = data.vault_kv_secret_v2.fdio_logs.data["region"] -# out_aws_access_key_id = data.vault_kv_secret_v2.fdio_docs.data["access_key"] -# out_aws_secret_access_key = data.vault_kv_secret_v2.fdio_docs.data["secret_key"] -# out_aws_default_region = data.vault_kv_secret_v2.fdio_docs.data["region"] -# cron = "0 30 0 * * * *" -# datacenters = ["yul1"] -# job_name = "etl-coverage-hoststack-rls2406" -#} -# -#module "etl-coverage-mrr-rls2406" { -# providers = { -# nomad = nomad.yul1 -# } -# source = "../" -# -# aws_access_key_id = data.vault_kv_secret_v2.fdio_logs.data["access_key"] -# aws_secret_access_key = data.vault_kv_secret_v2.fdio_logs.data["secret_key"] -# aws_default_region = data.vault_kv_secret_v2.fdio_logs.data["region"] -# out_aws_access_key_id = data.vault_kv_secret_v2.fdio_docs.data["access_key"] -# out_aws_secret_access_key = data.vault_kv_secret_v2.fdio_docs.data["secret_key"] -# out_aws_default_region = data.vault_kv_secret_v2.fdio_docs.data["region"] -# cron = "0 30 0 * * * *" -# datacenters = ["yul1"] -# job_name = "etl-coverage-mrr-rls2406" -#} -# -#module "etl-coverage-ndrpdr-rls2406" { -# providers = { -# nomad = nomad.yul1 -# } -# source = "../" -# -# aws_access_key_id = data.vault_kv_secret_v2.fdio_logs.data["access_key"] -# aws_secret_access_key = data.vault_kv_secret_v2.fdio_logs.data["secret_key"] -# aws_default_region = data.vault_kv_secret_v2.fdio_logs.data["region"] -# out_aws_access_key_id = data.vault_kv_secret_v2.fdio_docs.data["access_key"] -# out_aws_secret_access_key = data.vault_kv_secret_v2.fdio_docs.data["secret_key"] -# out_aws_default_region = data.vault_kv_secret_v2.fdio_docs.data["region"] -# cron = "0 30 0 * * * *" -# datacenters = ["yul1"] -# job_name = "etl-coverage-ndrpdr-rls2406" -#} -# -#module "etl-coverage-reconf-rls2406" { -# providers = { -# nomad = nomad.yul1 -# } -# source = "../" -# -# aws_access_key_id = data.vault_kv_secret_v2.fdio_logs.data["access_key"] -# aws_secret_access_key = data.vault_kv_secret_v2.fdio_logs.data["secret_key"] -# aws_default_region = data.vault_kv_secret_v2.fdio_logs.data["region"] -# out_aws_access_key_id = data.vault_kv_secret_v2.fdio_docs.data["access_key"] -# out_aws_secret_access_key = data.vault_kv_secret_v2.fdio_docs.data["secret_key"] -# out_aws_default_region = data.vault_kv_secret_v2.fdio_docs.data["region"] -# cron = "0 30 0 * * * *" -# datacenters = ["yul1"] -# job_name = "etl-coverage-reconf-rls2406" -#} -# -#module "etl-coverage-soak-rls2406" { -# providers = { -# nomad = nomad.yul1 -# } -# source = "../" -# -# aws_access_key_id = data.vault_kv_secret_v2.fdio_logs.data["access_key"] -# aws_secret_access_key = data.vault_kv_secret_v2.fdio_logs.data["secret_key"] -# aws_default_region = data.vault_kv_secret_v2.fdio_logs.data["region"] -# out_aws_access_key_id = data.vault_kv_secret_v2.fdio_docs.data["access_key"] -# out_aws_secret_access_key = data.vault_kv_secret_v2.fdio_docs.data["secret_key"] -# out_aws_default_region = data.vault_kv_secret_v2.fdio_docs.data["region"] -# cron = "0 30 0 * * * *" -# datacenters = ["yul1"] -# job_name = "etl-coverage-soak-rls2406" -#} diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/fdio/variables.tf b/fdio.infra.terraform/terraform-nomad-pyspark-etl/fdio/variables.tf deleted file mode 100644 index 60298d4c99..0000000000 --- a/fdio.infra.terraform/terraform-nomad-pyspark-etl/fdio/variables.tf +++ /dev/null @@ -1,47 +0,0 @@ -variable "nomad_acl" { - description = "Nomad ACLs enabled/disabled." - type = bool - default = false -} - -variable "nomad_provider_address" { - description = "FD.io Nomad cluster address." - type = string - default = "http://10.30.51.23:4646" -} - -variable "nomad_provider_ca_file" { - description = "A local file path to a PEM-encoded certificate authority." - type = string - default = "/etc/nomad.d/ssl/nomad-ca.pem" -} - -variable "nomad_provider_cert_file" { - description = "A local file path to a PEM-encoded certificate." - type = string - default = "/etc/nomad.d/ssl/nomad.pem" -} - -variable "nomad_provider_key_file" { - description = "A local file path to a PEM-encoded private key." - type = string - default = "/etc/nomad.d/ssl/nomad-key.pem" -} - -variable "vault_provider_address" { - description = "Vault cluster address." - type = string - default = "http://10.30.51.23:8200" -} - -variable "vault_provider_skip_tls_verify" { - description = "Verification of the Vault server's TLS certificate." - type = bool - default = false -} - -variable "vault_provider_token" { - description = "Vault root token." - type = string - sensitive = true -} diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/fdio/versions.tf b/fdio.infra.terraform/terraform-nomad-pyspark-etl/fdio/versions.tf deleted file mode 100644 index ffe25bb42e..0000000000 --- a/fdio.infra.terraform/terraform-nomad-pyspark-etl/fdio/versions.tf +++ /dev/null @@ -1,17 +0,0 @@ -terraform { - backend "consul" { - address = "10.30.51.23:8500" - scheme = "http" - path = "terraform/etl" - } - required_providers { - nomad = { - source = "hashicorp/nomad" - version = ">= 2.3.0" - } - vault = { - version = ">= 4.3.0" - } - } - required_version = ">= 1.5.4" -} diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/main.tf b/fdio.infra.terraform/terraform-nomad-pyspark-etl/main.tf index cd6a9a52ff..57baa24276 100644 --- a/fdio.infra.terraform/terraform-nomad-pyspark-etl/main.tf +++ b/fdio.infra.terraform/terraform-nomad-pyspark-etl/main.tf @@ -1,33 +1,28 @@ -locals { - datacenters = join(",", var.datacenters) - envs = join("\n", concat([], var.envs)) +data "vault_kv_secret_v2" "fdio_logs" { + mount = "kv" + name = "etl/fdio_logs" } -resource "nomad_job" "nomad_job" { - jobspec = templatefile( - "${path.module}/conf/nomad/${var.job_name}.hcl.tftpl", - { - aws_access_key_id = var.aws_access_key_id, - aws_secret_access_key = var.aws_secret_access_key, - aws_default_region = var.aws_default_region - cpu = var.cpu, - cron = var.cron, - datacenters = local.datacenters, - envs = local.envs, - image = var.image, - job_name = var.job_name, - memory = var.memory, - out_aws_access_key_id = var.out_aws_access_key_id, - out_aws_secret_access_key = var.out_aws_secret_access_key, - out_aws_default_region = var.out_aws_default_region - prohibit_overlap = var.prohibit_overlap, - time_zone = var.time_zone, - type = var.type, - use_vault_provider = var.vault_secret.use_vault_provider, - vault_kv_policy_name = var.vault_secret.vault_kv_policy_name, - vault_kv_path = var.vault_secret.vault_kv_path, - vault_kv_field_access_key = var.vault_secret.vault_kv_field_access_key, - vault_kv_field_secret_key = var.vault_secret.vault_kv_field_secret_key - }) - detach = false +data "vault_kv_secret_v2" "fdio_docs" { + mount = "kv" + name = "etl/fdio_docs" } + +module "etl" { + for_each = { for job in var.nomad_jobs : job.job_name => job } + providers = { + nomad = nomad.yul1 + } + source = "../terraform-nomad-nomad-job" + + aws_access_key_id = data.vault_kv_secret_v2.fdio_logs.data.access_key + aws_secret_access_key = data.vault_kv_secret_v2.fdio_logs.data.secret_key + aws_default_region = data.vault_kv_secret_v2.fdio_logs.data.region + out_aws_access_key_id = data.vault_kv_secret_v2.fdio_docs.data.access_key + out_aws_secret_access_key = data.vault_kv_secret_v2.fdio_docs.data.secret_key + out_aws_default_region = data.vault_kv_secret_v2.fdio_docs.data.region + cron = "0 30 0 * * * *" + datacenters = ["yul1"] + job_name = each.key + memory = each.value.memory +}
\ No newline at end of file diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/fdio/providers.tf b/fdio.infra.terraform/terraform-nomad-pyspark-etl/providers.tf index c6617da02b..c6617da02b 100644 --- a/fdio.infra.terraform/terraform-nomad-pyspark-etl/fdio/providers.tf +++ b/fdio.infra.terraform/terraform-nomad-pyspark-etl/providers.tf diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/variables.tf b/fdio.infra.terraform/terraform-nomad-pyspark-etl/variables.tf index 86d1b45753..e8ddeb086c 100644 --- a/fdio.infra.terraform/terraform-nomad-pyspark-etl/variables.tf +++ b/fdio.infra.terraform/terraform-nomad-pyspark-etl/variables.tf @@ -1,115 +1,118 @@ -# Nomad -variable "datacenters" { - description = "Specifies the list of DCs to be considered placing this task." - type = list(string) - default = ["dc1"] -} - -# ETL -variable "aws_access_key_id" { - description = "AWS access key." - type = string - default = "aws" -} - -variable "aws_secret_access_key" { - description = "AWS secret key" - type = string - default = "aws" -} - -variable "aws_default_region" { - description = "AWS region" - type = string - default = "aws" -} - -variable "cpu" { - description = "Specifies the CPU required to run this task in MHz." - type = number - default = 10000 -} - -variable "cron" { - description = "Specifies a cron expression configuring the interval to launch." - type = string - default = "@daily" -} - -variable "envs" { - description = "Specifies ETL environment variables." - type = list(string) - default = [] +variable "nomad_acl" { + description = "Nomad ACLs enabled/disabled." + type = bool + default = false } -variable "image" { - description = "Specifies the Docker image to run." +variable "nomad_provider_address" { + description = "FD.io Nomad cluster address." type = string - default = "pmikus/docker-ubuntu-focal-aws-glue:latest" + default = "http://10.30.51.23:4646" } -variable "job_name" { - description = "Specifies a name for the job." +variable "nomad_provider_ca_file" { + description = "A local file path to a PEM-encoded certificate authority." type = string - default = "etl" -} - -variable "memory" { - description = "Specifies the memory required in MB." - type = number - default = 50000 + default = "/etc/nomad.d/ssl/nomad-ca.pem" } -variable "out_aws_access_key_id" { - description = "AWS access key." +variable "nomad_provider_cert_file" { + description = "A local file path to a PEM-encoded certificate." type = string - default = "aws" + default = "/etc/nomad.d/ssl/nomad.pem" } -variable "out_aws_secret_access_key" { - description = "AWS secret key" +variable "nomad_provider_key_file" { + description = "A local file path to a PEM-encoded private key." type = string - default = "aws" + default = "/etc/nomad.d/ssl/nomad-key.pem" } -variable "out_aws_default_region" { - description = "AWS region" +variable "vault_provider_address" { + description = "Vault cluster address." type = string - default = "aws" + default = "http://10.30.51.23:8200" } -variable "prohibit_overlap" { - description = "Specifies if this job should wait until previous completed." +variable "vault_provider_skip_tls_verify" { + description = "Verification of the Vault server's TLS certificate." type = bool - default = true + default = false } -variable "time_zone" { - description = "Specifies the time zone to evaluate the next launch interval." +variable "vault_provider_token" { + description = "Vault root token." type = string - default = "UTC" -} - -variable "type" { - description = "Specifies the Nomad scheduler to use." - type = string - default = "batch" -} - -variable "vault_secret" { - type = object({ - use_vault_provider = bool, - vault_kv_policy_name = string, - vault_kv_path = string, - vault_kv_field_access_key = string, - vault_kv_field_secret_key = string - }) - description = "Set of properties to be able to fetch secret from vault." - default = { - use_vault_provider = true - vault_kv_policy_name = "kv" - vault_kv_path = "data/etl" - vault_kv_field_access_key = "access_key" - vault_kv_field_secret_key = "secret_key" - } -} + sensitive = true +} + +variable "nomad_jobs" { + description = "List of ETL jobs" + type = list(map(any)) + default = [ + { + job_name = "etl-stats" + memory = 50000 + }, + { + job_name = "etl-trending-hoststack" + memory = 50000 + }, + { + job_name = "etl-iterative-hoststack-rls2410" + memory = 50000 + }, + { + job_name = "etl-iterative-mrr-rls2410" + memory = 50000 + }, + { + job_name = "etl-iterative-ndrpdr-rls2410" + memory = 50000 + }, + { + job_name = "etl-iterative-reconf-rls2410" + memory = 50000 + }, + { + job_name = "etl-iterative-soak-rls2410" + memory = 50000 + }, + { + job_name = "etl-coverage-device-rls2410" + memory = 50000 + }, + { + job_name = "etl-coverage-hoststack-rls2410" + memory = 50000 + }, + { + job_name = "etl-coverage-mrr-rls2410" + memory = 50000 + }, + { + job_name = "etl-coverage-ndrpdr-rls2410" + memory = 50000 + }, + { + job_name = "etl-coverage-reconf-rls2410" + memory = 50000 + }, + { + job_name = "etl-coverage-soak-rls2410" + memory = 50000 + }, + { + job_name = "etl-trending-mrr" + memory = 60000 + }, + { + job_name = "etl-trending-ndrpdr" + memory = 60000 + }, + { + job_name = "etl-trending-soak" + memory = 60000 + } + ] +}
\ No newline at end of file diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/versions.tf b/fdio.infra.terraform/terraform-nomad-pyspark-etl/versions.tf index f40435fe77..ffe25bb42e 100644 --- a/fdio.infra.terraform/terraform-nomad-pyspark-etl/versions.tf +++ b/fdio.infra.terraform/terraform-nomad-pyspark-etl/versions.tf @@ -1,8 +1,16 @@ terraform { + backend "consul" { + address = "10.30.51.23:8500" + scheme = "http" + path = "terraform/etl" + } required_providers { nomad = { source = "hashicorp/nomad" - version = ">= 1.4.20" + version = ">= 2.3.0" + } + vault = { + version = ">= 4.3.0" } } required_version = ">= 1.5.4" |