aboutsummaryrefslogtreecommitdiffstats
path: root/fdio.infra.terraform/terraform-nomad-pyspark-etl
diff options
context:
space:
mode:
Diffstat (limited to 'fdio.infra.terraform/terraform-nomad-pyspark-etl')
-rw-r--r--fdio.infra.terraform/terraform-nomad-pyspark-etl/README.md50
-rw-r--r--fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-device-rls2402.hcl.tftpl55
-rw-r--r--fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-hoststack-rls2402.hcl.tftpl55
-rw-r--r--fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-mrr-rls2402.hcl.tftpl55
-rw-r--r--fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-ndrpdr-rls2402.hcl.tftpl55
-rw-r--r--fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-reconf-rls2402.hcl.tftpl55
-rw-r--r--fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-soak-rls2402.hcl.tftpl55
-rw-r--r--fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-hoststack-rls2402.hcl.tftpl55
-rw-r--r--fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-mrr-rls2402.hcl.tftpl55
-rw-r--r--fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-ndrpdr-rls2402.hcl.tftpl55
-rw-r--r--fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-reconf-rls2402.hcl.tftpl55
-rw-r--r--fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-soak-rls2402.hcl.tftpl55
-rw-r--r--fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-stats.hcl.tftpl53
-rw-r--r--fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-trending-hoststack.hcl.tftpl53
-rw-r--r--fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-trending-mrr.hcl.tftpl53
-rw-r--r--fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-trending-ndrpdr.hcl.tftpl55
-rw-r--r--fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-trending-soak.hcl.tftpl55
-rw-r--r--fdio.infra.terraform/terraform-nomad-pyspark-etl/fdio/main.tf283
-rw-r--r--fdio.infra.terraform/terraform-nomad-pyspark-etl/fdio/providers.tf13
-rw-r--r--fdio.infra.terraform/terraform-nomad-pyspark-etl/fdio/variables.tf47
-rw-r--r--fdio.infra.terraform/terraform-nomad-pyspark-etl/fdio/versions.tf17
-rw-r--r--fdio.infra.terraform/terraform-nomad-pyspark-etl/main.tf33
-rw-r--r--fdio.infra.terraform/terraform-nomad-pyspark-etl/variables.tf115
-rw-r--r--fdio.infra.terraform/terraform-nomad-pyspark-etl/versions.tf9
24 files changed, 1441 insertions, 0 deletions
diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/README.md b/fdio.infra.terraform/terraform-nomad-pyspark-etl/README.md
new file mode 100644
index 0000000000..d61c8778d4
--- /dev/null
+++ b/fdio.infra.terraform/terraform-nomad-pyspark-etl/README.md
@@ -0,0 +1,50 @@
+<!-- BEGIN_TF_DOCS -->
+## Requirements
+
+| Name | Version |
+|------|---------|
+| <a name="requirement_terraform"></a> [terraform](#requirement\_terraform) | >= 1.5.4 |
+| <a name="requirement_nomad"></a> [nomad](#requirement\_nomad) | >= 1.4.20 |
+
+## Providers
+
+| Name | Version |
+|------|---------|
+| <a name="provider_nomad"></a> [nomad](#provider\_nomad) | >= 1.4.20 |
+
+## Modules
+
+No modules.
+
+## Resources
+
+| Name | Type |
+|------|------|
+| [nomad_job.nomad_job](https://registry.terraform.io/providers/hashicorp/nomad/latest/docs/resources/job) | resource |
+
+## Inputs
+
+| Name | Description | Type | Default | Required |
+|------|-------------|------|---------|:--------:|
+| <a name="input_aws_access_key_id"></a> [aws\_access\_key\_id](#input\_aws\_access\_key\_id) | AWS access key. | `string` | `"aws"` | no |
+| <a name="input_aws_default_region"></a> [aws\_default\_region](#input\_aws\_default\_region) | AWS region | `string` | `"aws"` | no |
+| <a name="input_aws_secret_access_key"></a> [aws\_secret\_access\_key](#input\_aws\_secret\_access\_key) | AWS secret key | `string` | `"aws"` | no |
+| <a name="input_cpu"></a> [cpu](#input\_cpu) | Specifies the CPU required to run this task in MHz. | `number` | `10000` | no |
+| <a name="input_cron"></a> [cron](#input\_cron) | Specifies a cron expression configuring the interval to launch. | `string` | `"@daily"` | no |
+| <a name="input_datacenters"></a> [datacenters](#input\_datacenters) | Specifies the list of DCs to be considered placing this task. | `list(string)` | <pre>[<br> "dc1"<br>]</pre> | no |
+| <a name="input_envs"></a> [envs](#input\_envs) | Specifies ETL environment variables. | `list(string)` | `[]` | no |
+| <a name="input_image"></a> [image](#input\_image) | Specifies the Docker image to run. | `string` | `"pmikus/docker-ubuntu-focal-aws-glue:latest"` | no |
+| <a name="input_job_name"></a> [job\_name](#input\_job\_name) | Specifies a name for the job. | `string` | `"etl"` | no |
+| <a name="input_memory"></a> [memory](#input\_memory) | Specifies the memory required in MB. | `number` | `50000` | no |
+| <a name="input_out_aws_access_key_id"></a> [out\_aws\_access\_key\_id](#input\_out\_aws\_access\_key\_id) | AWS access key. | `string` | `"aws"` | no |
+| <a name="input_out_aws_default_region"></a> [out\_aws\_default\_region](#input\_out\_aws\_default\_region) | AWS region | `string` | `"aws"` | no |
+| <a name="input_out_aws_secret_access_key"></a> [out\_aws\_secret\_access\_key](#input\_out\_aws\_secret\_access\_key) | AWS secret key | `string` | `"aws"` | no |
+| <a name="input_prohibit_overlap"></a> [prohibit\_overlap](#input\_prohibit\_overlap) | Specifies if this job should wait until previous completed. | `bool` | `true` | no |
+| <a name="input_time_zone"></a> [time\_zone](#input\_time\_zone) | Specifies the time zone to evaluate the next launch interval. | `string` | `"UTC"` | no |
+| <a name="input_type"></a> [type](#input\_type) | Specifies the Nomad scheduler to use. | `string` | `"batch"` | no |
+| <a name="input_vault_secret"></a> [vault\_secret](#input\_vault\_secret) | Set of properties to be able to fetch secret from vault. | <pre>object({<br> use_vault_provider = bool,<br> vault_kv_policy_name = string,<br> vault_kv_path = string,<br> vault_kv_field_access_key = string,<br> vault_kv_field_secret_key = string<br> })</pre> | <pre>{<br> "use_vault_provider": false,<br> "vault_kv_field_access_key": "access_key",<br> "vault_kv_field_secret_key": "secret_key",<br> "vault_kv_path": "secret/data/etl",<br> "vault_kv_policy_name": "kv"<br>}</pre> | no |
+
+## Outputs
+
+No outputs.
+<!-- END_TF_DOCS --> \ No newline at end of file
diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-device-rls2402.hcl.tftpl b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-device-rls2402.hcl.tftpl
new file mode 100644
index 0000000000..cc0b1df8b5
--- /dev/null
+++ b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-device-rls2402.hcl.tftpl
@@ -0,0 +1,55 @@
+job "${job_name}" {
+ datacenters = "${datacenters}"
+ type = "${type}"
+ periodic {
+ cron = "${cron}"
+ prohibit_overlap = "${prohibit_overlap}"
+ time_zone = "${time_zone}"
+ }
+ group "${job_name}" {
+ restart {
+ mode = "fail"
+ }
+ constraint {
+ attribute = "$${attr.cpu.arch}"
+ operator = "!="
+ value = "arm64"
+ }
+ constraint {
+ attribute = "$${node.class}"
+ value = "builder"
+ }
+ task "${job_name}" {
+ artifact {
+ source = "git::https://github.com/FDio/csit"
+ destination = "local/csit"
+ }
+ driver = "docker"
+ config {
+ image = "${image}"
+ command = "gluesparksubmit"
+ args = [
+ "--driver-memory", "20g",
+ "--executor-memory", "20g",
+ "--executor-cores", "2",
+ "--master", "local[2]",
+ "coverage_device_rls2402.py"
+ ]
+ work_dir = "/local/csit/csit.infra.etl"
+ }
+ env {
+ AWS_ACCESS_KEY_ID = "${aws_access_key_id}"
+ AWS_SECRET_ACCESS_KEY = "${aws_secret_access_key}"
+ AWS_DEFAULT_REGION = "${aws_default_region}"
+ OUT_AWS_ACCESS_KEY_ID = "${out_aws_access_key_id}"
+ OUT_AWS_SECRET_ACCESS_KEY = "${out_aws_secret_access_key}"
+ OUT_AWS_DEFAULT_REGION = "${out_aws_default_region}"
+ ${ envs }
+ }
+ resources {
+ cpu = ${cpu}
+ memory = ${memory}
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-hoststack-rls2402.hcl.tftpl b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-hoststack-rls2402.hcl.tftpl
new file mode 100644
index 0000000000..95d7a4c46e
--- /dev/null
+++ b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-hoststack-rls2402.hcl.tftpl
@@ -0,0 +1,55 @@
+job "${job_name}" {
+ datacenters = "${datacenters}"
+ type = "${type}"
+ periodic {
+ cron = "${cron}"
+ prohibit_overlap = "${prohibit_overlap}"
+ time_zone = "${time_zone}"
+ }
+ group "${job_name}" {
+ restart {
+ mode = "fail"
+ }
+ constraint {
+ attribute = "$${attr.cpu.arch}"
+ operator = "!="
+ value = "arm64"
+ }
+ constraint {
+ attribute = "$${node.class}"
+ value = "builder"
+ }
+ task "${job_name}" {
+ artifact {
+ source = "git::https://github.com/FDio/csit"
+ destination = "local/csit"
+ }
+ driver = "docker"
+ config {
+ image = "${image}"
+ command = "gluesparksubmit"
+ args = [
+ "--driver-memory", "20g",
+ "--executor-memory", "20g",
+ "--executor-cores", "2",
+ "--master", "local[2]",
+ "coverage_hoststack_rls2402.py"
+ ]
+ work_dir = "/local/csit/csit.infra.etl"
+ }
+ env {
+ AWS_ACCESS_KEY_ID = "${aws_access_key_id}"
+ AWS_SECRET_ACCESS_KEY = "${aws_secret_access_key}"
+ AWS_DEFAULT_REGION = "${aws_default_region}"
+ OUT_AWS_ACCESS_KEY_ID = "${out_aws_access_key_id}"
+ OUT_AWS_SECRET_ACCESS_KEY = "${out_aws_secret_access_key}"
+ OUT_AWS_DEFAULT_REGION = "${out_aws_default_region}"
+ ${ envs }
+ }
+ resources {
+ cpu = ${cpu}
+ memory = ${memory}
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-mrr-rls2402.hcl.tftpl b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-mrr-rls2402.hcl.tftpl
new file mode 100644
index 0000000000..3bab9264fa
--- /dev/null
+++ b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-mrr-rls2402.hcl.tftpl
@@ -0,0 +1,55 @@
+job "${job_name}" {
+ datacenters = "${datacenters}"
+ type = "${type}"
+ periodic {
+ cron = "${cron}"
+ prohibit_overlap = "${prohibit_overlap}"
+ time_zone = "${time_zone}"
+ }
+ group "${job_name}" {
+ restart {
+ mode = "fail"
+ }
+ constraint {
+ attribute = "$${attr.cpu.arch}"
+ operator = "!="
+ value = "arm64"
+ }
+ constraint {
+ attribute = "$${node.class}"
+ value = "builder"
+ }
+ task "${job_name}" {
+ artifact {
+ source = "git::https://github.com/FDio/csit"
+ destination = "local/csit"
+ }
+ driver = "docker"
+ config {
+ image = "${image}"
+ command = "gluesparksubmit"
+ args = [
+ "--driver-memory", "20g",
+ "--executor-memory", "20g",
+ "--executor-cores", "2",
+ "--master", "local[2]",
+ "coverage_mrr_rls2402.py"
+ ]
+ work_dir = "/local/csit/csit.infra.etl"
+ }
+ env {
+ AWS_ACCESS_KEY_ID = "${aws_access_key_id}"
+ AWS_SECRET_ACCESS_KEY = "${aws_secret_access_key}"
+ AWS_DEFAULT_REGION = "${aws_default_region}"
+ OUT_AWS_ACCESS_KEY_ID = "${out_aws_access_key_id}"
+ OUT_AWS_SECRET_ACCESS_KEY = "${out_aws_secret_access_key}"
+ OUT_AWS_DEFAULT_REGION = "${out_aws_default_region}"
+ ${ envs }
+ }
+ resources {
+ cpu = ${cpu}
+ memory = ${memory}
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-ndrpdr-rls2402.hcl.tftpl b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-ndrpdr-rls2402.hcl.tftpl
new file mode 100644
index 0000000000..6142219546
--- /dev/null
+++ b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-ndrpdr-rls2402.hcl.tftpl
@@ -0,0 +1,55 @@
+job "${job_name}" {
+ datacenters = "${datacenters}"
+ type = "${type}"
+ periodic {
+ cron = "${cron}"
+ prohibit_overlap = "${prohibit_overlap}"
+ time_zone = "${time_zone}"
+ }
+ group "${job_name}" {
+ restart {
+ mode = "fail"
+ }
+ constraint {
+ attribute = "$${attr.cpu.arch}"
+ operator = "!="
+ value = "arm64"
+ }
+ constraint {
+ attribute = "$${node.class}"
+ value = "builder"
+ }
+ task "${job_name}" {
+ artifact {
+ source = "git::https://github.com/FDio/csit"
+ destination = "local/csit"
+ }
+ driver = "docker"
+ config {
+ image = "${image}"
+ command = "gluesparksubmit"
+ args = [
+ "--driver-memory", "20g",
+ "--executor-memory", "20g",
+ "--executor-cores", "2",
+ "--master", "local[2]",
+ "coverage_ndrpdr_rls2402.py"
+ ]
+ work_dir = "/local/csit/csit.infra.etl"
+ }
+ env {
+ AWS_ACCESS_KEY_ID = "${aws_access_key_id}"
+ AWS_SECRET_ACCESS_KEY = "${aws_secret_access_key}"
+ AWS_DEFAULT_REGION = "${aws_default_region}"
+ OUT_AWS_ACCESS_KEY_ID = "${out_aws_access_key_id}"
+ OUT_AWS_SECRET_ACCESS_KEY = "${out_aws_secret_access_key}"
+ OUT_AWS_DEFAULT_REGION = "${out_aws_default_region}"
+ ${ envs }
+ }
+ resources {
+ cpu = ${cpu}
+ memory = ${memory}
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-reconf-rls2402.hcl.tftpl b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-reconf-rls2402.hcl.tftpl
new file mode 100644
index 0000000000..b474e75217
--- /dev/null
+++ b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-reconf-rls2402.hcl.tftpl
@@ -0,0 +1,55 @@
+job "${job_name}" {
+ datacenters = "${datacenters}"
+ type = "${type}"
+ periodic {
+ cron = "${cron}"
+ prohibit_overlap = "${prohibit_overlap}"
+ time_zone = "${time_zone}"
+ }
+ group "${job_name}" {
+ restart {
+ mode = "fail"
+ }
+ constraint {
+ attribute = "$${attr.cpu.arch}"
+ operator = "!="
+ value = "arm64"
+ }
+ constraint {
+ attribute = "$${node.class}"
+ value = "builder"
+ }
+ task "${job_name}" {
+ artifact {
+ source = "git::https://github.com/FDio/csit"
+ destination = "local/csit"
+ }
+ driver = "docker"
+ config {
+ image = "${image}"
+ command = "gluesparksubmit"
+ args = [
+ "--driver-memory", "20g",
+ "--executor-memory", "20g",
+ "--executor-cores", "2",
+ "--master", "local[2]",
+ "coverage_reconf_rls2402.py"
+ ]
+ work_dir = "/local/csit/csit.infra.etl"
+ }
+ env {
+ AWS_ACCESS_KEY_ID = "${aws_access_key_id}"
+ AWS_SECRET_ACCESS_KEY = "${aws_secret_access_key}"
+ AWS_DEFAULT_REGION = "${aws_default_region}"
+ OUT_AWS_ACCESS_KEY_ID = "${out_aws_access_key_id}"
+ OUT_AWS_SECRET_ACCESS_KEY = "${out_aws_secret_access_key}"
+ OUT_AWS_DEFAULT_REGION = "${out_aws_default_region}"
+ ${ envs }
+ }
+ resources {
+ cpu = ${cpu}
+ memory = ${memory}
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-soak-rls2402.hcl.tftpl b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-soak-rls2402.hcl.tftpl
new file mode 100644
index 0000000000..0352e1e879
--- /dev/null
+++ b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-soak-rls2402.hcl.tftpl
@@ -0,0 +1,55 @@
+job "${job_name}" {
+ datacenters = "${datacenters}"
+ type = "${type}"
+ periodic {
+ cron = "${cron}"
+ prohibit_overlap = "${prohibit_overlap}"
+ time_zone = "${time_zone}"
+ }
+ group "${job_name}" {
+ restart {
+ mode = "fail"
+ }
+ constraint {
+ attribute = "$${attr.cpu.arch}"
+ operator = "!="
+ value = "arm64"
+ }
+ constraint {
+ attribute = "$${node.class}"
+ value = "builder"
+ }
+ task "${job_name}" {
+ artifact {
+ source = "git::https://github.com/FDio/csit"
+ destination = "local/csit"
+ }
+ driver = "docker"
+ config {
+ image = "${image}"
+ command = "gluesparksubmit"
+ args = [
+ "--driver-memory", "20g",
+ "--executor-memory", "20g",
+ "--executor-cores", "2",
+ "--master", "local[2]",
+ "coverage_soak_rls2402.py"
+ ]
+ work_dir = "/local/csit/csit.infra.etl"
+ }
+ env {
+ AWS_ACCESS_KEY_ID = "${aws_access_key_id}"
+ AWS_SECRET_ACCESS_KEY = "${aws_secret_access_key}"
+ AWS_DEFAULT_REGION = "${aws_default_region}"
+ OUT_AWS_ACCESS_KEY_ID = "${out_aws_access_key_id}"
+ OUT_AWS_SECRET_ACCESS_KEY = "${out_aws_secret_access_key}"
+ OUT_AWS_DEFAULT_REGION = "${out_aws_default_region}"
+ ${ envs }
+ }
+ resources {
+ cpu = ${cpu}
+ memory = ${memory}
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-hoststack-rls2402.hcl.tftpl b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-hoststack-rls2402.hcl.tftpl
new file mode 100644
index 0000000000..74478c59f7
--- /dev/null
+++ b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-hoststack-rls2402.hcl.tftpl
@@ -0,0 +1,55 @@
+job "${job_name}" {
+ datacenters = "${datacenters}"
+ type = "${type}"
+ periodic {
+ cron = "${cron}"
+ prohibit_overlap = "${prohibit_overlap}"
+ time_zone = "${time_zone}"
+ }
+ group "${job_name}" {
+ restart {
+ mode = "fail"
+ }
+ constraint {
+ attribute = "$${attr.cpu.arch}"
+ operator = "!="
+ value = "arm64"
+ }
+ constraint {
+ attribute = "$${node.class}"
+ value = "builder"
+ }
+ task "${job_name}" {
+ artifact {
+ source = "git::https://github.com/FDio/csit"
+ destination = "local/csit"
+ }
+ driver = "docker"
+ config {
+ image = "${image}"
+ command = "gluesparksubmit"
+ args = [
+ "--driver-memory", "20g",
+ "--executor-memory", "20g",
+ "--executor-cores", "2",
+ "--master", "local[2]",
+ "iterative_hoststack_rls2402.py"
+ ]
+ work_dir = "/local/csit/csit.infra.etl"
+ }
+ env {
+ AWS_ACCESS_KEY_ID = "${aws_access_key_id}"
+ AWS_SECRET_ACCESS_KEY = "${aws_secret_access_key}"
+ AWS_DEFAULT_REGION = "${aws_default_region}"
+ OUT_AWS_ACCESS_KEY_ID = "${out_aws_access_key_id}"
+ OUT_AWS_SECRET_ACCESS_KEY = "${out_aws_secret_access_key}"
+ OUT_AWS_DEFAULT_REGION = "${out_aws_default_region}"
+ ${ envs }
+ }
+ resources {
+ cpu = ${cpu}
+ memory = ${memory}
+ }
+ }
+ }
+}
diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-mrr-rls2402.hcl.tftpl b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-mrr-rls2402.hcl.tftpl
new file mode 100644
index 0000000000..e6bd87b8ed
--- /dev/null
+++ b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-mrr-rls2402.hcl.tftpl
@@ -0,0 +1,55 @@
+job "${job_name}" {
+ datacenters = "${datacenters}"
+ type = "${type}"
+ periodic {
+ cron = "${cron}"
+ prohibit_overlap = "${prohibit_overlap}"
+ time_zone = "${time_zone}"
+ }
+ group "${job_name}" {
+ restart {
+ mode = "fail"
+ }
+ constraint {
+ attribute = "$${attr.cpu.arch}"
+ operator = "!="
+ value = "arm64"
+ }
+ constraint {
+ attribute = "$${node.class}"
+ value = "builder"
+ }
+ task "${job_name}" {
+ artifact {
+ source = "git::https://github.com/FDio/csit"
+ destination = "local/csit"
+ }
+ driver = "docker"
+ config {
+ image = "${image}"
+ command = "gluesparksubmit"
+ args = [
+ "--driver-memory", "20g",
+ "--executor-memory", "20g",
+ "--executor-cores", "2",
+ "--master", "local[2]",
+ "iterative_mrr_rls2402.py"
+ ]
+ work_dir = "/local/csit/csit.infra.etl"
+ }
+ env {
+ AWS_ACCESS_KEY_ID = "${aws_access_key_id}"
+ AWS_SECRET_ACCESS_KEY = "${aws_secret_access_key}"
+ AWS_DEFAULT_REGION = "${aws_default_region}"
+ OUT_AWS_ACCESS_KEY_ID = "${out_aws_access_key_id}"
+ OUT_AWS_SECRET_ACCESS_KEY = "${out_aws_secret_access_key}"
+ OUT_AWS_DEFAULT_REGION = "${out_aws_default_region}"
+ ${ envs }
+ }
+ resources {
+ cpu = ${cpu}
+ memory = ${memory}
+ }
+ }
+ }
+}
diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-ndrpdr-rls2402.hcl.tftpl b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-ndrpdr-rls2402.hcl.tftpl
new file mode 100644
index 0000000000..4a40321377
--- /dev/null
+++ b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-ndrpdr-rls2402.hcl.tftpl
@@ -0,0 +1,55 @@
+job "${job_name}" {
+ datacenters = "${datacenters}"
+ type = "${type}"
+ periodic {
+ cron = "${cron}"
+ prohibit_overlap = "${prohibit_overlap}"
+ time_zone = "${time_zone}"
+ }
+ group "${job_name}" {
+ restart {
+ mode = "fail"
+ }
+ constraint {
+ attribute = "$${attr.cpu.arch}"
+ operator = "!="
+ value = "arm64"
+ }
+ constraint {
+ attribute = "$${node.class}"
+ value = "builder"
+ }
+ task "${job_name}" {
+ artifact {
+ source = "git::https://github.com/FDio/csit"
+ destination = "local/csit"
+ }
+ driver = "docker"
+ config {
+ image = "${image}"
+ command = "gluesparksubmit"
+ args = [
+ "--driver-memory", "20g",
+ "--executor-memory", "20g",
+ "--executor-cores", "2",
+ "--master", "local[2]",
+ "iterative_ndrpdr_rls2402.py"
+ ]
+ work_dir = "/local/csit/csit.infra.etl"
+ }
+ env {
+ AWS_ACCESS_KEY_ID = "${aws_access_key_id}"
+ AWS_SECRET_ACCESS_KEY = "${aws_secret_access_key}"
+ AWS_DEFAULT_REGION = "${aws_default_region}"
+ OUT_AWS_ACCESS_KEY_ID = "${out_aws_access_key_id}"
+ OUT_AWS_SECRET_ACCESS_KEY = "${out_aws_secret_access_key}"
+ OUT_AWS_DEFAULT_REGION = "${out_aws_default_region}"
+ ${ envs }
+ }
+ resources {
+ cpu = ${cpu}
+ memory = ${memory}
+ }
+ }
+ }
+}
diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-reconf-rls2402.hcl.tftpl b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-reconf-rls2402.hcl.tftpl
new file mode 100644
index 0000000000..670dd37a11
--- /dev/null
+++ b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-reconf-rls2402.hcl.tftpl
@@ -0,0 +1,55 @@
+job "${job_name}" {
+ datacenters = "${datacenters}"
+ type = "${type}"
+ periodic {
+ cron = "${cron}"
+ prohibit_overlap = "${prohibit_overlap}"
+ time_zone = "${time_zone}"
+ }
+ group "${job_name}" {
+ restart {
+ mode = "fail"
+ }
+ constraint {
+ attribute = "$${attr.cpu.arch}"
+ operator = "!="
+ value = "arm64"
+ }
+ constraint {
+ attribute = "$${node.class}"
+ value = "builder"
+ }
+ task "${job_name}" {
+ artifact {
+ source = "git::https://github.com/FDio/csit"
+ destination = "local/csit"
+ }
+ driver = "docker"
+ config {
+ image = "${image}"
+ command = "gluesparksubmit"
+ args = [
+ "--driver-memory", "20g",
+ "--executor-memory", "20g",
+ "--executor-cores", "2",
+ "--master", "local[2]",
+ "iterative_reconf_rls2402.py"
+ ]
+ work_dir = "/local/csit/csit.infra.etl"
+ }
+ env {
+ AWS_ACCESS_KEY_ID = "${aws_access_key_id}"
+ AWS_SECRET_ACCESS_KEY = "${aws_secret_access_key}"
+ AWS_DEFAULT_REGION = "${aws_default_region}"
+ OUT_AWS_ACCESS_KEY_ID = "${out_aws_access_key_id}"
+ OUT_AWS_SECRET_ACCESS_KEY = "${out_aws_secret_access_key}"
+ OUT_AWS_DEFAULT_REGION = "${out_aws_default_region}"
+ ${ envs }
+ }
+ resources {
+ cpu = ${cpu}
+ memory = ${memory}
+ }
+ }
+ }
+}
diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-soak-rls2402.hcl.tftpl b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-soak-rls2402.hcl.tftpl
new file mode 100644
index 0000000000..c4ad363879
--- /dev/null
+++ b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-soak-rls2402.hcl.tftpl
@@ -0,0 +1,55 @@
+job "${job_name}" {
+ datacenters = "${datacenters}"
+ type = "${type}"
+ periodic {
+ cron = "${cron}"
+ prohibit_overlap = "${prohibit_overlap}"
+ time_zone = "${time_zone}"
+ }
+ group "${job_name}" {
+ restart {
+ mode = "fail"
+ }
+ constraint {
+ attribute = "$${attr.cpu.arch}"
+ operator = "!="
+ value = "arm64"
+ }
+ constraint {
+ attribute = "$${node.class}"
+ value = "builder"
+ }
+ task "${job_name}" {
+ artifact {
+ source = "git::https://github.com/FDio/csit"
+ destination = "local/csit"
+ }
+ driver = "docker"
+ config {
+ image = "${image}"
+ command = "gluesparksubmit"
+ args = [
+ "--driver-memory", "20g",
+ "--executor-memory", "20g",
+ "--executor-cores", "2",
+ "--master", "local[2]",
+ "iterative_soak_rls2402.py"
+ ]
+ work_dir = "/local/csit/csit.infra.etl"
+ }
+ env {
+ AWS_ACCESS_KEY_ID = "${aws_access_key_id}"
+ AWS_SECRET_ACCESS_KEY = "${aws_secret_access_key}"
+ AWS_DEFAULT_REGION = "${aws_default_region}"
+ OUT_AWS_ACCESS_KEY_ID = "${out_aws_access_key_id}"
+ OUT_AWS_SECRET_ACCESS_KEY = "${out_aws_secret_access_key}"
+ OUT_AWS_DEFAULT_REGION = "${out_aws_default_region}"
+ ${ envs }
+ }
+ resources {
+ cpu = ${cpu}
+ memory = ${memory}
+ }
+ }
+ }
+}
diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-stats.hcl.tftpl b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-stats.hcl.tftpl
new file mode 100644
index 0000000000..86ca584de7
--- /dev/null
+++ b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-stats.hcl.tftpl
@@ -0,0 +1,53 @@
+job "${job_name}" {
+ datacenters = "${datacenters}"
+ type = "${type}"
+ periodic {
+ cron = "${cron}"
+ prohibit_overlap = "${prohibit_overlap}"
+ time_zone = "${time_zone}"
+ }
+ group "${job_name}" {
+ restart {
+ mode = "fail"
+ }
+ constraint {
+ attribute = "$${attr.cpu.arch}"
+ operator = "!="
+ value = "arm64"
+ }
+ constraint {
+ attribute = "$${node.class}"
+ value = "builder"
+ }
+ task "${job_name}" {
+ artifact {
+ source = "git::https://github.com/FDio/csit"
+ destination = "local/csit"
+ }
+ driver = "docker"
+ config {
+ image = "${image}"
+ command = "gluesparksubmit"
+ args = [
+ "--driver-memory", "10g",
+ "--executor-memory", "10g",
+ "stats.py"
+ ]
+ work_dir = "/local/csit/csit.infra.etl"
+ }
+ env {
+ AWS_ACCESS_KEY_ID = "${aws_access_key_id}"
+ AWS_SECRET_ACCESS_KEY = "${aws_secret_access_key}"
+ AWS_DEFAULT_REGION = "${aws_default_region}"
+ OUT_AWS_ACCESS_KEY_ID = "${out_aws_access_key_id}"
+ OUT_AWS_SECRET_ACCESS_KEY = "${out_aws_secret_access_key}"
+ OUT_AWS_DEFAULT_REGION = "${out_aws_default_region}"
+ ${ envs }
+ }
+ resources {
+ cpu = ${cpu}
+ memory = ${memory}
+ }
+ }
+ }
+}
diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-trending-hoststack.hcl.tftpl b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-trending-hoststack.hcl.tftpl
new file mode 100644
index 0000000000..24aa4095d2
--- /dev/null
+++ b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-trending-hoststack.hcl.tftpl
@@ -0,0 +1,53 @@
+job "${job_name}" {
+ datacenters = "${datacenters}"
+ type = "${type}"
+ periodic {
+ cron = "${cron}"
+ prohibit_overlap = "${prohibit_overlap}"
+ time_zone = "${time_zone}"
+ }
+ group "${job_name}" {
+ restart {
+ mode = "fail"
+ }
+ constraint {
+ attribute = "$${attr.cpu.arch}"
+ operator = "!="
+ value = "arm64"
+ }
+ constraint {
+ attribute = "$${node.class}"
+ value = "builder"
+ }
+ task "${job_name}" {
+ artifact {
+ source = "git::https://github.com/FDio/csit"
+ destination = "local/csit"
+ }
+ driver = "docker"
+ config {
+ image = "${image}"
+ command = "gluesparksubmit"
+ args = [
+ "--driver-memory", "30g",
+ "--executor-memory", "30g",
+ "trending_hoststack.py"
+ ]
+ work_dir = "/local/csit/csit.infra.etl"
+ }
+ env {
+ AWS_ACCESS_KEY_ID = "${aws_access_key_id}"
+ AWS_SECRET_ACCESS_KEY = "${aws_secret_access_key}"
+ AWS_DEFAULT_REGION = "${aws_default_region}"
+ OUT_AWS_ACCESS_KEY_ID = "${out_aws_access_key_id}"
+ OUT_AWS_SECRET_ACCESS_KEY = "${out_aws_secret_access_key}"
+ OUT_AWS_DEFAULT_REGION = "${out_aws_default_region}"
+ ${ envs }
+ }
+ resources {
+ cpu = ${cpu}
+ memory = ${memory}
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-trending-mrr.hcl.tftpl b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-trending-mrr.hcl.tftpl
new file mode 100644
index 0000000000..47d6149eed
--- /dev/null
+++ b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-trending-mrr.hcl.tftpl
@@ -0,0 +1,53 @@
+job "${job_name}" {
+ datacenters = "${datacenters}"
+ type = "${type}"
+ periodic {
+ cron = "${cron}"
+ prohibit_overlap = "${prohibit_overlap}"
+ time_zone = "${time_zone}"
+ }
+ group "${job_name}" {
+ restart {
+ mode = "fail"
+ }
+ constraint {
+ attribute = "$${attr.cpu.arch}"
+ operator = "!="
+ value = "arm64"
+ }
+ constraint {
+ attribute = "$${node.class}"
+ value = "builder"
+ }
+ task "${job_name}" {
+ artifact {
+ source = "git::https://github.com/FDio/csit"
+ destination = "local/csit"
+ }
+ driver = "docker"
+ config {
+ image = "${image}"
+ command = "gluesparksubmit"
+ args = [
+ "--driver-memory", "30g",
+ "--executor-memory", "30g",
+ "trending_mrr.py"
+ ]
+ work_dir = "/local/csit/csit.infra.etl"
+ }
+ env {
+ AWS_ACCESS_KEY_ID = "${aws_access_key_id}"
+ AWS_SECRET_ACCESS_KEY = "${aws_secret_access_key}"
+ AWS_DEFAULT_REGION = "${aws_default_region}"
+ OUT_AWS_ACCESS_KEY_ID = "${out_aws_access_key_id}"
+ OUT_AWS_SECRET_ACCESS_KEY = "${out_aws_secret_access_key}"
+ OUT_AWS_DEFAULT_REGION = "${out_aws_default_region}"
+ ${ envs }
+ }
+ resources {
+ cpu = ${cpu}
+ memory = ${memory}
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-trending-ndrpdr.hcl.tftpl b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-trending-ndrpdr.hcl.tftpl
new file mode 100644
index 0000000000..8cd40f537e
--- /dev/null
+++ b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-trending-ndrpdr.hcl.tftpl
@@ -0,0 +1,55 @@
+job "${job_name}" {
+ datacenters = "${datacenters}"
+ type = "${type}"
+ periodic {
+ cron = "${cron}"
+ prohibit_overlap = "${prohibit_overlap}"
+ time_zone = "${time_zone}"
+ }
+ group "${job_name}" {
+ restart {
+ mode = "fail"
+ }
+ constraint {
+ attribute = "$${attr.cpu.arch}"
+ operator = "!="
+ value = "arm64"
+ }
+ constraint {
+ attribute = "$${node.class}"
+ value = "builder"
+ }
+ task "${job_name}" {
+ artifact {
+ source = "git::https://github.com/FDio/csit"
+ destination = "local/csit"
+ }
+ driver = "docker"
+ config {
+ image = "${image}"
+ command = "gluesparksubmit"
+ args = [
+ "--driver-memory", "30g",
+ "--executor-memory", "30g",
+ "--executor-cores", "2",
+ "--master", "local[2]",
+ "trending_ndrpdr.py"
+ ]
+ work_dir = "/local/csit/csit.infra.etl"
+ }
+ env {
+ AWS_ACCESS_KEY_ID = "${aws_access_key_id}"
+ AWS_SECRET_ACCESS_KEY = "${aws_secret_access_key}"
+ AWS_DEFAULT_REGION = "${aws_default_region}"
+ OUT_AWS_ACCESS_KEY_ID = "${out_aws_access_key_id}"
+ OUT_AWS_SECRET_ACCESS_KEY = "${out_aws_secret_access_key}"
+ OUT_AWS_DEFAULT_REGION = "${out_aws_default_region}"
+ ${ envs }
+ }
+ resources {
+ cpu = ${cpu}
+ memory = ${memory}
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-trending-soak.hcl.tftpl b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-trending-soak.hcl.tftpl
new file mode 100644
index 0000000000..6d77a898df
--- /dev/null
+++ b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-trending-soak.hcl.tftpl
@@ -0,0 +1,55 @@
+job "${job_name}" {
+ datacenters = "${datacenters}"
+ type = "${type}"
+ periodic {
+ cron = "${cron}"
+ prohibit_overlap = "${prohibit_overlap}"
+ time_zone = "${time_zone}"
+ }
+ group "${job_name}" {
+ restart {
+ mode = "fail"
+ }
+ constraint {
+ attribute = "$${attr.cpu.arch}"
+ operator = "!="
+ value = "arm64"
+ }
+ constraint {
+ attribute = "$${node.class}"
+ value = "builder"
+ }
+ task "${job_name}" {
+ artifact {
+ source = "git::https://github.com/FDio/csit"
+ destination = "local/csit"
+ }
+ driver = "docker"
+ config {
+ image = "${image}"
+ command = "gluesparksubmit"
+ args = [
+ "--driver-memory", "30g",
+ "--executor-memory", "30g",
+ "--executor-cores", "2",
+ "--master", "local[2]",
+ "trending_soak.py"
+ ]
+ work_dir = "/local/csit/csit.infra.etl"
+ }
+ env {
+ AWS_ACCESS_KEY_ID = "${aws_access_key_id}"
+ AWS_SECRET_ACCESS_KEY = "${aws_secret_access_key}"
+ AWS_DEFAULT_REGION = "${aws_default_region}"
+ OUT_AWS_ACCESS_KEY_ID = "${out_aws_access_key_id}"
+ OUT_AWS_SECRET_ACCESS_KEY = "${out_aws_secret_access_key}"
+ OUT_AWS_DEFAULT_REGION = "${out_aws_default_region}"
+ ${ envs }
+ }
+ resources {
+ cpu = ${cpu}
+ memory = ${memory}
+ }
+ }
+ }
+}
diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/fdio/main.tf b/fdio.infra.terraform/terraform-nomad-pyspark-etl/fdio/main.tf
new file mode 100644
index 0000000000..aac81d9b78
--- /dev/null
+++ b/fdio.infra.terraform/terraform-nomad-pyspark-etl/fdio/main.tf
@@ -0,0 +1,283 @@
+data "vault_generic_secret" "fdio_logs" {
+ path = "kv/secret/data/etl/fdio_logs"
+}
+
+data "vault_generic_secret" "fdio_docs" {
+ path = "kv/secret/data/etl/fdio_docs"
+}
+
+module "etl-stats" {
+ providers = {
+ nomad = nomad.yul1
+ }
+ source = "../"
+
+ aws_access_key_id = data.vault_generic_secret.fdio_logs.data["access_key"]
+ aws_secret_access_key = data.vault_generic_secret.fdio_logs.data["secret_key"]
+ aws_default_region = data.vault_generic_secret.fdio_logs.data["region"]
+ out_aws_access_key_id = data.vault_generic_secret.fdio_docs.data["access_key"]
+ out_aws_secret_access_key = data.vault_generic_secret.fdio_docs.data["secret_key"]
+ out_aws_default_region = data.vault_generic_secret.fdio_docs.data["region"]
+ cron = "0 30 0 * * * *"
+ datacenters = ["yul1"]
+ job_name = "etl-stats"
+}
+
+module "etl-trending-hoststack" {
+ providers = {
+ nomad = nomad.yul1
+ }
+ source = "../"
+
+ aws_access_key_id = data.vault_generic_secret.fdio_logs.data["access_key"]
+ aws_secret_access_key = data.vault_generic_secret.fdio_logs.data["secret_key"]
+ aws_default_region = data.vault_generic_secret.fdio_logs.data["region"]
+ out_aws_access_key_id = data.vault_generic_secret.fdio_docs.data["access_key"]
+ out_aws_secret_access_key = data.vault_generic_secret.fdio_docs.data["secret_key"]
+ out_aws_default_region = data.vault_generic_secret.fdio_docs.data["region"]
+ cron = "0 30 0 * * * *"
+ datacenters = ["yul1"]
+ job_name = "etl-trending-hoststack"
+}
+
+module "etl-trending-mrr" {
+ providers = {
+ nomad = nomad.yul1
+ }
+ source = "../"
+
+ aws_access_key_id = data.vault_generic_secret.fdio_logs.data["access_key"]
+ aws_secret_access_key = data.vault_generic_secret.fdio_logs.data["secret_key"]
+ aws_default_region = data.vault_generic_secret.fdio_logs.data["region"]
+ out_aws_access_key_id = data.vault_generic_secret.fdio_docs.data["access_key"]
+ out_aws_secret_access_key = data.vault_generic_secret.fdio_docs.data["secret_key"]
+ out_aws_default_region = data.vault_generic_secret.fdio_docs.data["region"]
+ cron = "0 30 0 * * * *"
+ datacenters = ["yul1"]
+ job_name = "etl-trending-mrr"
+ memory = 60000
+}
+
+module "etl-trending-ndrpdr" {
+ providers = {
+ nomad = nomad.yul1
+ }
+ source = "../"
+
+ aws_access_key_id = data.vault_generic_secret.fdio_logs.data["access_key"]
+ aws_secret_access_key = data.vault_generic_secret.fdio_logs.data["secret_key"]
+ aws_default_region = data.vault_generic_secret.fdio_logs.data["region"]
+ out_aws_access_key_id = data.vault_generic_secret.fdio_docs.data["access_key"]
+ out_aws_secret_access_key = data.vault_generic_secret.fdio_docs.data["secret_key"]
+ out_aws_default_region = data.vault_generic_secret.fdio_docs.data["region"]
+ cron = "0 30 0 * * * *"
+ datacenters = ["yul1"]
+ job_name = "etl-trending-ndrpdr"
+ memory = 60000
+}
+
+module "etl-trending-soak" {
+ providers = {
+ nomad = nomad.yul1
+ }
+ source = "../"
+
+ aws_access_key_id = data.vault_generic_secret.fdio_logs.data["access_key"]
+ aws_secret_access_key = data.vault_generic_secret.fdio_logs.data["secret_key"]
+ aws_default_region = data.vault_generic_secret.fdio_logs.data["region"]
+ out_aws_access_key_id = data.vault_generic_secret.fdio_docs.data["access_key"]
+ out_aws_secret_access_key = data.vault_generic_secret.fdio_docs.data["secret_key"]
+ out_aws_default_region = data.vault_generic_secret.fdio_docs.data["region"]
+ cron = "0 30 0 * * * *"
+ datacenters = ["yul1"]
+ job_name = "etl-trending-soak"
+ memory = 60000
+}
+
+#module "etl-iterative-hoststack-rls2402" {
+# providers = {
+# nomad = nomad.yul1
+# }
+# source = "../"
+#
+# aws_access_key_id = data.vault_generic_secret.fdio_logs.data["access_key"]
+# aws_secret_access_key = data.vault_generic_secret.fdio_logs.data["secret_key"]
+# aws_default_region = data.vault_generic_secret.fdio_logs.data["region"]
+# out_aws_access_key_id = data.vault_generic_secret.fdio_docs.data["access_key"]
+# out_aws_secret_access_key = data.vault_generic_secret.fdio_docs.data["secret_key"]
+# out_aws_default_region = data.vault_generic_secret.fdio_docs.data["region"]
+# cron = "0 30 0 * * * *"
+# datacenters = ["yul1"]
+# job_name = "etl-iterative-hoststack-rls2402"
+#}
+#
+#module "etl-iterative-mrr-rls2402" {
+# providers = {
+# nomad = nomad.yul1
+# }
+# source = "../"
+#
+# aws_access_key_id = data.vault_generic_secret.fdio_logs.data["access_key"]
+# aws_secret_access_key = data.vault_generic_secret.fdio_logs.data["secret_key"]
+# aws_default_region = data.vault_generic_secret.fdio_logs.data["region"]
+# out_aws_access_key_id = data.vault_generic_secret.fdio_docs.data["access_key"]
+# out_aws_secret_access_key = data.vault_generic_secret.fdio_docs.data["secret_key"]
+# out_aws_default_region = data.vault_generic_secret.fdio_docs.data["region"]
+# cron = "0 30 0 * * * *"
+# datacenters = ["yul1"]
+# job_name = "etl-iterative-mrr-rls2402"
+#}
+#
+#module "etl-iterative-ndrpdr-rls2402" {
+# providers = {
+# nomad = nomad.yul1
+# }
+# source = "../"
+#
+# aws_access_key_id = data.vault_generic_secret.fdio_logs.data["access_key"]
+# aws_secret_access_key = data.vault_generic_secret.fdio_logs.data["secret_key"]
+# aws_default_region = data.vault_generic_secret.fdio_logs.data["region"]
+# out_aws_access_key_id = data.vault_generic_secret.fdio_docs.data["access_key"]
+# out_aws_secret_access_key = data.vault_generic_secret.fdio_docs.data["secret_key"]
+# out_aws_default_region = data.vault_generic_secret.fdio_docs.data["region"]
+# cron = "0 30 0 * * * *"
+# datacenters = ["yul1"]
+# job_name = "etl-iterative-ndrpdr-rls2402"
+#}
+#
+#module "etl-iterative-reconf-rls2402" {
+# providers = {
+# nomad = nomad.yul1
+# }
+# source = "../"
+#
+# aws_access_key_id = data.vault_generic_secret.fdio_logs.data["access_key"]
+# aws_secret_access_key = data.vault_generic_secret.fdio_logs.data["secret_key"]
+# aws_default_region = data.vault_generic_secret.fdio_logs.data["region"]
+# out_aws_access_key_id = data.vault_generic_secret.fdio_docs.data["access_key"]
+# out_aws_secret_access_key = data.vault_generic_secret.fdio_docs.data["secret_key"]
+# out_aws_default_region = data.vault_generic_secret.fdio_docs.data["region"]
+# cron = "0 30 0 * * * *"
+# datacenters = ["yul1"]
+# job_name = "etl-iterative-reconf-rls2402"
+#}
+#
+#module "etl-iterative-soak-rls2402" {
+# providers = {
+# nomad = nomad.yul1
+# }
+# source = "../"
+#
+# aws_access_key_id = data.vault_generic_secret.fdio_logs.data["access_key"]
+# aws_secret_access_key = data.vault_generic_secret.fdio_logs.data["secret_key"]
+# aws_default_region = data.vault_generic_secret.fdio_logs.data["region"]
+# out_aws_access_key_id = data.vault_generic_secret.fdio_docs.data["access_key"]
+# out_aws_secret_access_key = data.vault_generic_secret.fdio_docs.data["secret_key"]
+# out_aws_default_region = data.vault_generic_secret.fdio_docs.data["region"]
+# cron = "0 30 0 * * * *"
+# datacenters = ["yul1"]
+# job_name = "etl-iterative-soak-rls2402"
+#}
+#
+#module "etl-coverage-device-rls2402" {
+# providers = {
+# nomad = nomad.yul1
+# }
+# source = "../"
+#
+# aws_access_key_id = data.vault_generic_secret.fdio_logs.data["access_key"]
+# aws_secret_access_key = data.vault_generic_secret.fdio_logs.data["secret_key"]
+# aws_default_region = data.vault_generic_secret.fdio_logs.data["region"]
+# out_aws_access_key_id = data.vault_generic_secret.fdio_docs.data["access_key"]
+# out_aws_secret_access_key = data.vault_generic_secret.fdio_docs.data["secret_key"]
+# out_aws_default_region = data.vault_generic_secret.fdio_docs.data["region"]
+# cron = "0 30 0 * * * *"
+# datacenters = ["yul1"]
+# job_name = "etl-coverage-device-rls2402"
+#}
+#
+#module "etl-coverage-hoststack-rls2402" {
+# providers = {
+# nomad = nomad.yul1
+# }
+# source = "../"
+#
+# aws_access_key_id = data.vault_generic_secret.fdio_logs.data["access_key"]
+# aws_secret_access_key = data.vault_generic_secret.fdio_logs.data["secret_key"]
+# aws_default_region = data.vault_generic_secret.fdio_logs.data["region"]
+# out_aws_access_key_id = data.vault_generic_secret.fdio_docs.data["access_key"]
+# out_aws_secret_access_key = data.vault_generic_secret.fdio_docs.data["secret_key"]
+# out_aws_default_region = data.vault_generic_secret.fdio_docs.data["region"]
+# cron = "0 30 0 * * * *"
+# datacenters = ["yul1"]
+# job_name = "etl-coverage-hoststack-rls2402"
+#}
+#
+#module "etl-coverage-mrr-rls2402" {
+# providers = {
+# nomad = nomad.yul1
+# }
+# source = "../"
+#
+# aws_access_key_id = data.vault_generic_secret.fdio_logs.data["access_key"]
+# aws_secret_access_key = data.vault_generic_secret.fdio_logs.data["secret_key"]
+# aws_default_region = data.vault_generic_secret.fdio_logs.data["region"]
+# out_aws_access_key_id = data.vault_generic_secret.fdio_docs.data["access_key"]
+# out_aws_secret_access_key = data.vault_generic_secret.fdio_docs.data["secret_key"]
+# out_aws_default_region = data.vault_generic_secret.fdio_docs.data["region"]
+# cron = "0 30 0 * * * *"
+# datacenters = ["yul1"]
+# job_name = "etl-coverage-mrr-rls2402"
+#}
+#
+#module "etl-coverage-ndrpdr-rls2402" {
+# providers = {
+# nomad = nomad.yul1
+# }
+# source = "../"
+#
+# aws_access_key_id = data.vault_generic_secret.fdio_logs.data["access_key"]
+# aws_secret_access_key = data.vault_generic_secret.fdio_logs.data["secret_key"]
+# aws_default_region = data.vault_generic_secret.fdio_logs.data["region"]
+# out_aws_access_key_id = data.vault_generic_secret.fdio_docs.data["access_key"]
+# out_aws_secret_access_key = data.vault_generic_secret.fdio_docs.data["secret_key"]
+# out_aws_default_region = data.vault_generic_secret.fdio_docs.data["region"]
+# cron = "0 30 0 * * * *"
+# datacenters = ["yul1"]
+# job_name = "etl-coverage-ndrpdr-rls2402"
+#}
+#
+#module "etl-coverage-reconf-rls2402" {
+# providers = {
+# nomad = nomad.yul1
+# }
+# source = "../"
+#
+# aws_access_key_id = data.vault_generic_secret.fdio_logs.data["access_key"]
+# aws_secret_access_key = data.vault_generic_secret.fdio_logs.data["secret_key"]
+# aws_default_region = data.vault_generic_secret.fdio_logs.data["region"]
+# out_aws_access_key_id = data.vault_generic_secret.fdio_docs.data["access_key"]
+# out_aws_secret_access_key = data.vault_generic_secret.fdio_docs.data["secret_key"]
+# out_aws_default_region = data.vault_generic_secret.fdio_docs.data["region"]
+# cron = "0 30 0 * * * *"
+# datacenters = ["yul1"]
+# job_name = "etl-coverage-reconf-rls2402"
+#}
+#
+#module "etl-coverage-soak-rls2402" {
+# providers = {
+# nomad = nomad.yul1
+# }
+# source = "../"
+#
+# aws_access_key_id = data.vault_generic_secret.fdio_logs.data["access_key"]
+# aws_secret_access_key = data.vault_generic_secret.fdio_logs.data["secret_key"]
+# aws_default_region = data.vault_generic_secret.fdio_logs.data["region"]
+# out_aws_access_key_id = data.vault_generic_secret.fdio_docs.data["access_key"]
+# out_aws_secret_access_key = data.vault_generic_secret.fdio_docs.data["secret_key"]
+# out_aws_default_region = data.vault_generic_secret.fdio_docs.data["region"]
+# cron = "0 30 0 * * * *"
+# datacenters = ["yul1"]
+# job_name = "etl-coverage-soak-rls2402"
+#}
+# \ No newline at end of file
diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/fdio/providers.tf b/fdio.infra.terraform/terraform-nomad-pyspark-etl/fdio/providers.tf
new file mode 100644
index 0000000000..c6617da02b
--- /dev/null
+++ b/fdio.infra.terraform/terraform-nomad-pyspark-etl/fdio/providers.tf
@@ -0,0 +1,13 @@
+provider "nomad" {
+ address = var.nomad_provider_address
+ alias = "yul1"
+ # ca_file = var.nomad_provider_ca_file
+ # cert_file = var.nomad_provider_cert_file
+ # key_file = var.nomad_provider_key_file
+}
+
+provider "vault" {
+ address = var.vault_provider_address
+ skip_tls_verify = var.vault_provider_skip_tls_verify
+ token = var.vault_provider_token
+}
diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/fdio/variables.tf b/fdio.infra.terraform/terraform-nomad-pyspark-etl/fdio/variables.tf
new file mode 100644
index 0000000000..db24bdf0fa
--- /dev/null
+++ b/fdio.infra.terraform/terraform-nomad-pyspark-etl/fdio/variables.tf
@@ -0,0 +1,47 @@
+variable "nomad_acl" {
+ description = "Nomad ACLs enabled/disabled."
+ type = bool
+ default = false
+}
+
+variable "nomad_provider_address" {
+ description = "FD.io Nomad cluster address."
+ type = string
+ default = "http://10.30.51.23:4646"
+}
+
+variable "nomad_provider_ca_file" {
+ description = "A local file path to a PEM-encoded certificate authority."
+ type = string
+ default = "/etc/nomad.d/ssl/nomad-ca.pem"
+}
+
+variable "nomad_provider_cert_file" {
+ description = "A local file path to a PEM-encoded certificate."
+ type = string
+ default = "/etc/nomad.d/ssl/nomad-cli.pem"
+}
+
+variable "nomad_provider_key_file" {
+ description = "A local file path to a PEM-encoded private key."
+ type = string
+ default = "/etc/nomad.d/ssl/nomad-cli-key.pem"
+}
+
+variable "vault_provider_address" {
+ description = "Vault cluster address."
+ type = string
+ default = "http://10.30.51.23:8200"
+}
+
+variable "vault_provider_skip_tls_verify" {
+ description = "Verification of the Vault server's TLS certificate."
+ type = bool
+ default = false
+}
+
+variable "vault_provider_token" {
+ description = "Vault root token."
+ type = string
+ sensitive = true
+}
diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/fdio/versions.tf b/fdio.infra.terraform/terraform-nomad-pyspark-etl/fdio/versions.tf
new file mode 100644
index 0000000000..0c05e76d65
--- /dev/null
+++ b/fdio.infra.terraform/terraform-nomad-pyspark-etl/fdio/versions.tf
@@ -0,0 +1,17 @@
+terraform {
+ backend "consul" {
+ address = "10.30.51.23:8500"
+ scheme = "http"
+ path = "terraform/etl"
+ }
+ required_providers {
+ nomad = {
+ source = "hashicorp/nomad"
+ version = ">= 1.4.20"
+ }
+ vault = {
+ version = ">= 3.12.0"
+ }
+ }
+ required_version = ">= 1.5.4"
+}
diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/main.tf b/fdio.infra.terraform/terraform-nomad-pyspark-etl/main.tf
new file mode 100644
index 0000000000..cd6a9a52ff
--- /dev/null
+++ b/fdio.infra.terraform/terraform-nomad-pyspark-etl/main.tf
@@ -0,0 +1,33 @@
+locals {
+ datacenters = join(",", var.datacenters)
+ envs = join("\n", concat([], var.envs))
+}
+
+resource "nomad_job" "nomad_job" {
+ jobspec = templatefile(
+ "${path.module}/conf/nomad/${var.job_name}.hcl.tftpl",
+ {
+ aws_access_key_id = var.aws_access_key_id,
+ aws_secret_access_key = var.aws_secret_access_key,
+ aws_default_region = var.aws_default_region
+ cpu = var.cpu,
+ cron = var.cron,
+ datacenters = local.datacenters,
+ envs = local.envs,
+ image = var.image,
+ job_name = var.job_name,
+ memory = var.memory,
+ out_aws_access_key_id = var.out_aws_access_key_id,
+ out_aws_secret_access_key = var.out_aws_secret_access_key,
+ out_aws_default_region = var.out_aws_default_region
+ prohibit_overlap = var.prohibit_overlap,
+ time_zone = var.time_zone,
+ type = var.type,
+ use_vault_provider = var.vault_secret.use_vault_provider,
+ vault_kv_policy_name = var.vault_secret.vault_kv_policy_name,
+ vault_kv_path = var.vault_secret.vault_kv_path,
+ vault_kv_field_access_key = var.vault_secret.vault_kv_field_access_key,
+ vault_kv_field_secret_key = var.vault_secret.vault_kv_field_secret_key
+ })
+ detach = false
+}
diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/variables.tf b/fdio.infra.terraform/terraform-nomad-pyspark-etl/variables.tf
new file mode 100644
index 0000000000..f6d318e855
--- /dev/null
+++ b/fdio.infra.terraform/terraform-nomad-pyspark-etl/variables.tf
@@ -0,0 +1,115 @@
+# Nomad
+variable "datacenters" {
+ description = "Specifies the list of DCs to be considered placing this task."
+ type = list(string)
+ default = ["dc1"]
+}
+
+# ETL
+variable "aws_access_key_id" {
+ description = "AWS access key."
+ type = string
+ default = "aws"
+}
+
+variable "aws_secret_access_key" {
+ description = "AWS secret key"
+ type = string
+ default = "aws"
+}
+
+variable "aws_default_region" {
+ description = "AWS region"
+ type = string
+ default = "aws"
+}
+
+variable "cpu" {
+ description = "Specifies the CPU required to run this task in MHz."
+ type = number
+ default = 10000
+}
+
+variable "cron" {
+ description = "Specifies a cron expression configuring the interval to launch."
+ type = string
+ default = "@daily"
+}
+
+variable "envs" {
+ description = "Specifies ETL environment variables."
+ type = list(string)
+ default = []
+}
+
+variable "image" {
+ description = "Specifies the Docker image to run."
+ type = string
+ default = "pmikus/docker-ubuntu-focal-aws-glue:latest"
+}
+
+variable "job_name" {
+ description = "Specifies a name for the job."
+ type = string
+ default = "etl"
+}
+
+variable "memory" {
+ description = "Specifies the memory required in MB."
+ type = number
+ default = 50000
+}
+
+variable "out_aws_access_key_id" {
+ description = "AWS access key."
+ type = string
+ default = "aws"
+}
+
+variable "out_aws_secret_access_key" {
+ description = "AWS secret key"
+ type = string
+ default = "aws"
+}
+
+variable "out_aws_default_region" {
+ description = "AWS region"
+ type = string
+ default = "aws"
+}
+
+variable "prohibit_overlap" {
+ description = "Specifies if this job should wait until previous completed."
+ type = bool
+ default = true
+}
+
+variable "time_zone" {
+ description = "Specifies the time zone to evaluate the next launch interval."
+ type = string
+ default = "UTC"
+}
+
+variable "type" {
+ description = "Specifies the Nomad scheduler to use."
+ type = string
+ default = "batch"
+}
+
+variable "vault_secret" {
+ type = object({
+ use_vault_provider = bool,
+ vault_kv_policy_name = string,
+ vault_kv_path = string,
+ vault_kv_field_access_key = string,
+ vault_kv_field_secret_key = string
+ })
+ description = "Set of properties to be able to fetch secret from vault."
+ default = {
+ use_vault_provider = false
+ vault_kv_policy_name = "kv"
+ vault_kv_path = "secret/data/etl"
+ vault_kv_field_access_key = "access_key"
+ vault_kv_field_secret_key = "secret_key"
+ }
+}
diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/versions.tf b/fdio.infra.terraform/terraform-nomad-pyspark-etl/versions.tf
new file mode 100644
index 0000000000..f40435fe77
--- /dev/null
+++ b/fdio.infra.terraform/terraform-nomad-pyspark-etl/versions.tf
@@ -0,0 +1,9 @@
+terraform {
+ required_providers {
+ nomad = {
+ source = "hashicorp/nomad"
+ version = ">= 1.4.20"
+ }
+ }
+ required_version = ">= 1.5.4"
+}