diff options
Diffstat (limited to 'fdio.infra.terraform/terraform-nomad-pyspark-etl')
24 files changed, 1441 insertions, 0 deletions
diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/README.md b/fdio.infra.terraform/terraform-nomad-pyspark-etl/README.md new file mode 100644 index 0000000000..d61c8778d4 --- /dev/null +++ b/fdio.infra.terraform/terraform-nomad-pyspark-etl/README.md @@ -0,0 +1,50 @@ +<!-- BEGIN_TF_DOCS --> +## Requirements + +| Name | Version | +|------|---------| +| <a name="requirement_terraform"></a> [terraform](#requirement\_terraform) | >= 1.5.4 | +| <a name="requirement_nomad"></a> [nomad](#requirement\_nomad) | >= 1.4.20 | + +## Providers + +| Name | Version | +|------|---------| +| <a name="provider_nomad"></a> [nomad](#provider\_nomad) | >= 1.4.20 | + +## Modules + +No modules. + +## Resources + +| Name | Type | +|------|------| +| [nomad_job.nomad_job](https://registry.terraform.io/providers/hashicorp/nomad/latest/docs/resources/job) | resource | + +## Inputs + +| Name | Description | Type | Default | Required | +|------|-------------|------|---------|:--------:| +| <a name="input_aws_access_key_id"></a> [aws\_access\_key\_id](#input\_aws\_access\_key\_id) | AWS access key. | `string` | `"aws"` | no | +| <a name="input_aws_default_region"></a> [aws\_default\_region](#input\_aws\_default\_region) | AWS region | `string` | `"aws"` | no | +| <a name="input_aws_secret_access_key"></a> [aws\_secret\_access\_key](#input\_aws\_secret\_access\_key) | AWS secret key | `string` | `"aws"` | no | +| <a name="input_cpu"></a> [cpu](#input\_cpu) | Specifies the CPU required to run this task in MHz. | `number` | `10000` | no | +| <a name="input_cron"></a> [cron](#input\_cron) | Specifies a cron expression configuring the interval to launch. | `string` | `"@daily"` | no | +| <a name="input_datacenters"></a> [datacenters](#input\_datacenters) | Specifies the list of DCs to be considered placing this task. | `list(string)` | <pre>[<br> "dc1"<br>]</pre> | no | +| <a name="input_envs"></a> [envs](#input\_envs) | Specifies ETL environment variables. | `list(string)` | `[]` | no | +| <a name="input_image"></a> [image](#input\_image) | Specifies the Docker image to run. | `string` | `"pmikus/docker-ubuntu-focal-aws-glue:latest"` | no | +| <a name="input_job_name"></a> [job\_name](#input\_job\_name) | Specifies a name for the job. | `string` | `"etl"` | no | +| <a name="input_memory"></a> [memory](#input\_memory) | Specifies the memory required in MB. | `number` | `50000` | no | +| <a name="input_out_aws_access_key_id"></a> [out\_aws\_access\_key\_id](#input\_out\_aws\_access\_key\_id) | AWS access key. | `string` | `"aws"` | no | +| <a name="input_out_aws_default_region"></a> [out\_aws\_default\_region](#input\_out\_aws\_default\_region) | AWS region | `string` | `"aws"` | no | +| <a name="input_out_aws_secret_access_key"></a> [out\_aws\_secret\_access\_key](#input\_out\_aws\_secret\_access\_key) | AWS secret key | `string` | `"aws"` | no | +| <a name="input_prohibit_overlap"></a> [prohibit\_overlap](#input\_prohibit\_overlap) | Specifies if this job should wait until previous completed. | `bool` | `true` | no | +| <a name="input_time_zone"></a> [time\_zone](#input\_time\_zone) | Specifies the time zone to evaluate the next launch interval. | `string` | `"UTC"` | no | +| <a name="input_type"></a> [type](#input\_type) | Specifies the Nomad scheduler to use. | `string` | `"batch"` | no | +| <a name="input_vault_secret"></a> [vault\_secret](#input\_vault\_secret) | Set of properties to be able to fetch secret from vault. | <pre>object({<br> use_vault_provider = bool,<br> vault_kv_policy_name = string,<br> vault_kv_path = string,<br> vault_kv_field_access_key = string,<br> vault_kv_field_secret_key = string<br> })</pre> | <pre>{<br> "use_vault_provider": false,<br> "vault_kv_field_access_key": "access_key",<br> "vault_kv_field_secret_key": "secret_key",<br> "vault_kv_path": "secret/data/etl",<br> "vault_kv_policy_name": "kv"<br>}</pre> | no | + +## Outputs + +No outputs. +<!-- END_TF_DOCS -->
\ No newline at end of file diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-device-rls2402.hcl.tftpl b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-device-rls2402.hcl.tftpl new file mode 100644 index 0000000000..cc0b1df8b5 --- /dev/null +++ b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-device-rls2402.hcl.tftpl @@ -0,0 +1,55 @@ +job "${job_name}" { + datacenters = "${datacenters}" + type = "${type}" + periodic { + cron = "${cron}" + prohibit_overlap = "${prohibit_overlap}" + time_zone = "${time_zone}" + } + group "${job_name}" { + restart { + mode = "fail" + } + constraint { + attribute = "$${attr.cpu.arch}" + operator = "!=" + value = "arm64" + } + constraint { + attribute = "$${node.class}" + value = "builder" + } + task "${job_name}" { + artifact { + source = "git::https://github.com/FDio/csit" + destination = "local/csit" + } + driver = "docker" + config { + image = "${image}" + command = "gluesparksubmit" + args = [ + "--driver-memory", "20g", + "--executor-memory", "20g", + "--executor-cores", "2", + "--master", "local[2]", + "coverage_device_rls2402.py" + ] + work_dir = "/local/csit/csit.infra.etl" + } + env { + AWS_ACCESS_KEY_ID = "${aws_access_key_id}" + AWS_SECRET_ACCESS_KEY = "${aws_secret_access_key}" + AWS_DEFAULT_REGION = "${aws_default_region}" + OUT_AWS_ACCESS_KEY_ID = "${out_aws_access_key_id}" + OUT_AWS_SECRET_ACCESS_KEY = "${out_aws_secret_access_key}" + OUT_AWS_DEFAULT_REGION = "${out_aws_default_region}" + ${ envs } + } + resources { + cpu = ${cpu} + memory = ${memory} + } + } + } +}
\ No newline at end of file diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-hoststack-rls2402.hcl.tftpl b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-hoststack-rls2402.hcl.tftpl new file mode 100644 index 0000000000..95d7a4c46e --- /dev/null +++ b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-hoststack-rls2402.hcl.tftpl @@ -0,0 +1,55 @@ +job "${job_name}" { + datacenters = "${datacenters}" + type = "${type}" + periodic { + cron = "${cron}" + prohibit_overlap = "${prohibit_overlap}" + time_zone = "${time_zone}" + } + group "${job_name}" { + restart { + mode = "fail" + } + constraint { + attribute = "$${attr.cpu.arch}" + operator = "!=" + value = "arm64" + } + constraint { + attribute = "$${node.class}" + value = "builder" + } + task "${job_name}" { + artifact { + source = "git::https://github.com/FDio/csit" + destination = "local/csit" + } + driver = "docker" + config { + image = "${image}" + command = "gluesparksubmit" + args = [ + "--driver-memory", "20g", + "--executor-memory", "20g", + "--executor-cores", "2", + "--master", "local[2]", + "coverage_hoststack_rls2402.py" + ] + work_dir = "/local/csit/csit.infra.etl" + } + env { + AWS_ACCESS_KEY_ID = "${aws_access_key_id}" + AWS_SECRET_ACCESS_KEY = "${aws_secret_access_key}" + AWS_DEFAULT_REGION = "${aws_default_region}" + OUT_AWS_ACCESS_KEY_ID = "${out_aws_access_key_id}" + OUT_AWS_SECRET_ACCESS_KEY = "${out_aws_secret_access_key}" + OUT_AWS_DEFAULT_REGION = "${out_aws_default_region}" + ${ envs } + } + resources { + cpu = ${cpu} + memory = ${memory} + } + } + } +}
\ No newline at end of file diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-mrr-rls2402.hcl.tftpl b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-mrr-rls2402.hcl.tftpl new file mode 100644 index 0000000000..3bab9264fa --- /dev/null +++ b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-mrr-rls2402.hcl.tftpl @@ -0,0 +1,55 @@ +job "${job_name}" { + datacenters = "${datacenters}" + type = "${type}" + periodic { + cron = "${cron}" + prohibit_overlap = "${prohibit_overlap}" + time_zone = "${time_zone}" + } + group "${job_name}" { + restart { + mode = "fail" + } + constraint { + attribute = "$${attr.cpu.arch}" + operator = "!=" + value = "arm64" + } + constraint { + attribute = "$${node.class}" + value = "builder" + } + task "${job_name}" { + artifact { + source = "git::https://github.com/FDio/csit" + destination = "local/csit" + } + driver = "docker" + config { + image = "${image}" + command = "gluesparksubmit" + args = [ + "--driver-memory", "20g", + "--executor-memory", "20g", + "--executor-cores", "2", + "--master", "local[2]", + "coverage_mrr_rls2402.py" + ] + work_dir = "/local/csit/csit.infra.etl" + } + env { + AWS_ACCESS_KEY_ID = "${aws_access_key_id}" + AWS_SECRET_ACCESS_KEY = "${aws_secret_access_key}" + AWS_DEFAULT_REGION = "${aws_default_region}" + OUT_AWS_ACCESS_KEY_ID = "${out_aws_access_key_id}" + OUT_AWS_SECRET_ACCESS_KEY = "${out_aws_secret_access_key}" + OUT_AWS_DEFAULT_REGION = "${out_aws_default_region}" + ${ envs } + } + resources { + cpu = ${cpu} + memory = ${memory} + } + } + } +}
\ No newline at end of file diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-ndrpdr-rls2402.hcl.tftpl b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-ndrpdr-rls2402.hcl.tftpl new file mode 100644 index 0000000000..6142219546 --- /dev/null +++ b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-ndrpdr-rls2402.hcl.tftpl @@ -0,0 +1,55 @@ +job "${job_name}" { + datacenters = "${datacenters}" + type = "${type}" + periodic { + cron = "${cron}" + prohibit_overlap = "${prohibit_overlap}" + time_zone = "${time_zone}" + } + group "${job_name}" { + restart { + mode = "fail" + } + constraint { + attribute = "$${attr.cpu.arch}" + operator = "!=" + value = "arm64" + } + constraint { + attribute = "$${node.class}" + value = "builder" + } + task "${job_name}" { + artifact { + source = "git::https://github.com/FDio/csit" + destination = "local/csit" + } + driver = "docker" + config { + image = "${image}" + command = "gluesparksubmit" + args = [ + "--driver-memory", "20g", + "--executor-memory", "20g", + "--executor-cores", "2", + "--master", "local[2]", + "coverage_ndrpdr_rls2402.py" + ] + work_dir = "/local/csit/csit.infra.etl" + } + env { + AWS_ACCESS_KEY_ID = "${aws_access_key_id}" + AWS_SECRET_ACCESS_KEY = "${aws_secret_access_key}" + AWS_DEFAULT_REGION = "${aws_default_region}" + OUT_AWS_ACCESS_KEY_ID = "${out_aws_access_key_id}" + OUT_AWS_SECRET_ACCESS_KEY = "${out_aws_secret_access_key}" + OUT_AWS_DEFAULT_REGION = "${out_aws_default_region}" + ${ envs } + } + resources { + cpu = ${cpu} + memory = ${memory} + } + } + } +}
\ No newline at end of file diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-reconf-rls2402.hcl.tftpl b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-reconf-rls2402.hcl.tftpl new file mode 100644 index 0000000000..b474e75217 --- /dev/null +++ b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-reconf-rls2402.hcl.tftpl @@ -0,0 +1,55 @@ +job "${job_name}" { + datacenters = "${datacenters}" + type = "${type}" + periodic { + cron = "${cron}" + prohibit_overlap = "${prohibit_overlap}" + time_zone = "${time_zone}" + } + group "${job_name}" { + restart { + mode = "fail" + } + constraint { + attribute = "$${attr.cpu.arch}" + operator = "!=" + value = "arm64" + } + constraint { + attribute = "$${node.class}" + value = "builder" + } + task "${job_name}" { + artifact { + source = "git::https://github.com/FDio/csit" + destination = "local/csit" + } + driver = "docker" + config { + image = "${image}" + command = "gluesparksubmit" + args = [ + "--driver-memory", "20g", + "--executor-memory", "20g", + "--executor-cores", "2", + "--master", "local[2]", + "coverage_reconf_rls2402.py" + ] + work_dir = "/local/csit/csit.infra.etl" + } + env { + AWS_ACCESS_KEY_ID = "${aws_access_key_id}" + AWS_SECRET_ACCESS_KEY = "${aws_secret_access_key}" + AWS_DEFAULT_REGION = "${aws_default_region}" + OUT_AWS_ACCESS_KEY_ID = "${out_aws_access_key_id}" + OUT_AWS_SECRET_ACCESS_KEY = "${out_aws_secret_access_key}" + OUT_AWS_DEFAULT_REGION = "${out_aws_default_region}" + ${ envs } + } + resources { + cpu = ${cpu} + memory = ${memory} + } + } + } +}
\ No newline at end of file diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-soak-rls2402.hcl.tftpl b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-soak-rls2402.hcl.tftpl new file mode 100644 index 0000000000..0352e1e879 --- /dev/null +++ b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-coverage-soak-rls2402.hcl.tftpl @@ -0,0 +1,55 @@ +job "${job_name}" { + datacenters = "${datacenters}" + type = "${type}" + periodic { + cron = "${cron}" + prohibit_overlap = "${prohibit_overlap}" + time_zone = "${time_zone}" + } + group "${job_name}" { + restart { + mode = "fail" + } + constraint { + attribute = "$${attr.cpu.arch}" + operator = "!=" + value = "arm64" + } + constraint { + attribute = "$${node.class}" + value = "builder" + } + task "${job_name}" { + artifact { + source = "git::https://github.com/FDio/csit" + destination = "local/csit" + } + driver = "docker" + config { + image = "${image}" + command = "gluesparksubmit" + args = [ + "--driver-memory", "20g", + "--executor-memory", "20g", + "--executor-cores", "2", + "--master", "local[2]", + "coverage_soak_rls2402.py" + ] + work_dir = "/local/csit/csit.infra.etl" + } + env { + AWS_ACCESS_KEY_ID = "${aws_access_key_id}" + AWS_SECRET_ACCESS_KEY = "${aws_secret_access_key}" + AWS_DEFAULT_REGION = "${aws_default_region}" + OUT_AWS_ACCESS_KEY_ID = "${out_aws_access_key_id}" + OUT_AWS_SECRET_ACCESS_KEY = "${out_aws_secret_access_key}" + OUT_AWS_DEFAULT_REGION = "${out_aws_default_region}" + ${ envs } + } + resources { + cpu = ${cpu} + memory = ${memory} + } + } + } +}
\ No newline at end of file diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-hoststack-rls2402.hcl.tftpl b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-hoststack-rls2402.hcl.tftpl new file mode 100644 index 0000000000..74478c59f7 --- /dev/null +++ b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-hoststack-rls2402.hcl.tftpl @@ -0,0 +1,55 @@ +job "${job_name}" { + datacenters = "${datacenters}" + type = "${type}" + periodic { + cron = "${cron}" + prohibit_overlap = "${prohibit_overlap}" + time_zone = "${time_zone}" + } + group "${job_name}" { + restart { + mode = "fail" + } + constraint { + attribute = "$${attr.cpu.arch}" + operator = "!=" + value = "arm64" + } + constraint { + attribute = "$${node.class}" + value = "builder" + } + task "${job_name}" { + artifact { + source = "git::https://github.com/FDio/csit" + destination = "local/csit" + } + driver = "docker" + config { + image = "${image}" + command = "gluesparksubmit" + args = [ + "--driver-memory", "20g", + "--executor-memory", "20g", + "--executor-cores", "2", + "--master", "local[2]", + "iterative_hoststack_rls2402.py" + ] + work_dir = "/local/csit/csit.infra.etl" + } + env { + AWS_ACCESS_KEY_ID = "${aws_access_key_id}" + AWS_SECRET_ACCESS_KEY = "${aws_secret_access_key}" + AWS_DEFAULT_REGION = "${aws_default_region}" + OUT_AWS_ACCESS_KEY_ID = "${out_aws_access_key_id}" + OUT_AWS_SECRET_ACCESS_KEY = "${out_aws_secret_access_key}" + OUT_AWS_DEFAULT_REGION = "${out_aws_default_region}" + ${ envs } + } + resources { + cpu = ${cpu} + memory = ${memory} + } + } + } +} diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-mrr-rls2402.hcl.tftpl b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-mrr-rls2402.hcl.tftpl new file mode 100644 index 0000000000..e6bd87b8ed --- /dev/null +++ b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-mrr-rls2402.hcl.tftpl @@ -0,0 +1,55 @@ +job "${job_name}" { + datacenters = "${datacenters}" + type = "${type}" + periodic { + cron = "${cron}" + prohibit_overlap = "${prohibit_overlap}" + time_zone = "${time_zone}" + } + group "${job_name}" { + restart { + mode = "fail" + } + constraint { + attribute = "$${attr.cpu.arch}" + operator = "!=" + value = "arm64" + } + constraint { + attribute = "$${node.class}" + value = "builder" + } + task "${job_name}" { + artifact { + source = "git::https://github.com/FDio/csit" + destination = "local/csit" + } + driver = "docker" + config { + image = "${image}" + command = "gluesparksubmit" + args = [ + "--driver-memory", "20g", + "--executor-memory", "20g", + "--executor-cores", "2", + "--master", "local[2]", + "iterative_mrr_rls2402.py" + ] + work_dir = "/local/csit/csit.infra.etl" + } + env { + AWS_ACCESS_KEY_ID = "${aws_access_key_id}" + AWS_SECRET_ACCESS_KEY = "${aws_secret_access_key}" + AWS_DEFAULT_REGION = "${aws_default_region}" + OUT_AWS_ACCESS_KEY_ID = "${out_aws_access_key_id}" + OUT_AWS_SECRET_ACCESS_KEY = "${out_aws_secret_access_key}" + OUT_AWS_DEFAULT_REGION = "${out_aws_default_region}" + ${ envs } + } + resources { + cpu = ${cpu} + memory = ${memory} + } + } + } +} diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-ndrpdr-rls2402.hcl.tftpl b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-ndrpdr-rls2402.hcl.tftpl new file mode 100644 index 0000000000..4a40321377 --- /dev/null +++ b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-ndrpdr-rls2402.hcl.tftpl @@ -0,0 +1,55 @@ +job "${job_name}" { + datacenters = "${datacenters}" + type = "${type}" + periodic { + cron = "${cron}" + prohibit_overlap = "${prohibit_overlap}" + time_zone = "${time_zone}" + } + group "${job_name}" { + restart { + mode = "fail" + } + constraint { + attribute = "$${attr.cpu.arch}" + operator = "!=" + value = "arm64" + } + constraint { + attribute = "$${node.class}" + value = "builder" + } + task "${job_name}" { + artifact { + source = "git::https://github.com/FDio/csit" + destination = "local/csit" + } + driver = "docker" + config { + image = "${image}" + command = "gluesparksubmit" + args = [ + "--driver-memory", "20g", + "--executor-memory", "20g", + "--executor-cores", "2", + "--master", "local[2]", + "iterative_ndrpdr_rls2402.py" + ] + work_dir = "/local/csit/csit.infra.etl" + } + env { + AWS_ACCESS_KEY_ID = "${aws_access_key_id}" + AWS_SECRET_ACCESS_KEY = "${aws_secret_access_key}" + AWS_DEFAULT_REGION = "${aws_default_region}" + OUT_AWS_ACCESS_KEY_ID = "${out_aws_access_key_id}" + OUT_AWS_SECRET_ACCESS_KEY = "${out_aws_secret_access_key}" + OUT_AWS_DEFAULT_REGION = "${out_aws_default_region}" + ${ envs } + } + resources { + cpu = ${cpu} + memory = ${memory} + } + } + } +} diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-reconf-rls2402.hcl.tftpl b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-reconf-rls2402.hcl.tftpl new file mode 100644 index 0000000000..670dd37a11 --- /dev/null +++ b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-reconf-rls2402.hcl.tftpl @@ -0,0 +1,55 @@ +job "${job_name}" { + datacenters = "${datacenters}" + type = "${type}" + periodic { + cron = "${cron}" + prohibit_overlap = "${prohibit_overlap}" + time_zone = "${time_zone}" + } + group "${job_name}" { + restart { + mode = "fail" + } + constraint { + attribute = "$${attr.cpu.arch}" + operator = "!=" + value = "arm64" + } + constraint { + attribute = "$${node.class}" + value = "builder" + } + task "${job_name}" { + artifact { + source = "git::https://github.com/FDio/csit" + destination = "local/csit" + } + driver = "docker" + config { + image = "${image}" + command = "gluesparksubmit" + args = [ + "--driver-memory", "20g", + "--executor-memory", "20g", + "--executor-cores", "2", + "--master", "local[2]", + "iterative_reconf_rls2402.py" + ] + work_dir = "/local/csit/csit.infra.etl" + } + env { + AWS_ACCESS_KEY_ID = "${aws_access_key_id}" + AWS_SECRET_ACCESS_KEY = "${aws_secret_access_key}" + AWS_DEFAULT_REGION = "${aws_default_region}" + OUT_AWS_ACCESS_KEY_ID = "${out_aws_access_key_id}" + OUT_AWS_SECRET_ACCESS_KEY = "${out_aws_secret_access_key}" + OUT_AWS_DEFAULT_REGION = "${out_aws_default_region}" + ${ envs } + } + resources { + cpu = ${cpu} + memory = ${memory} + } + } + } +} diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-soak-rls2402.hcl.tftpl b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-soak-rls2402.hcl.tftpl new file mode 100644 index 0000000000..c4ad363879 --- /dev/null +++ b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-iterative-soak-rls2402.hcl.tftpl @@ -0,0 +1,55 @@ +job "${job_name}" { + datacenters = "${datacenters}" + type = "${type}" + periodic { + cron = "${cron}" + prohibit_overlap = "${prohibit_overlap}" + time_zone = "${time_zone}" + } + group "${job_name}" { + restart { + mode = "fail" + } + constraint { + attribute = "$${attr.cpu.arch}" + operator = "!=" + value = "arm64" + } + constraint { + attribute = "$${node.class}" + value = "builder" + } + task "${job_name}" { + artifact { + source = "git::https://github.com/FDio/csit" + destination = "local/csit" + } + driver = "docker" + config { + image = "${image}" + command = "gluesparksubmit" + args = [ + "--driver-memory", "20g", + "--executor-memory", "20g", + "--executor-cores", "2", + "--master", "local[2]", + "iterative_soak_rls2402.py" + ] + work_dir = "/local/csit/csit.infra.etl" + } + env { + AWS_ACCESS_KEY_ID = "${aws_access_key_id}" + AWS_SECRET_ACCESS_KEY = "${aws_secret_access_key}" + AWS_DEFAULT_REGION = "${aws_default_region}" + OUT_AWS_ACCESS_KEY_ID = "${out_aws_access_key_id}" + OUT_AWS_SECRET_ACCESS_KEY = "${out_aws_secret_access_key}" + OUT_AWS_DEFAULT_REGION = "${out_aws_default_region}" + ${ envs } + } + resources { + cpu = ${cpu} + memory = ${memory} + } + } + } +} diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-stats.hcl.tftpl b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-stats.hcl.tftpl new file mode 100644 index 0000000000..86ca584de7 --- /dev/null +++ b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-stats.hcl.tftpl @@ -0,0 +1,53 @@ +job "${job_name}" { + datacenters = "${datacenters}" + type = "${type}" + periodic { + cron = "${cron}" + prohibit_overlap = "${prohibit_overlap}" + time_zone = "${time_zone}" + } + group "${job_name}" { + restart { + mode = "fail" + } + constraint { + attribute = "$${attr.cpu.arch}" + operator = "!=" + value = "arm64" + } + constraint { + attribute = "$${node.class}" + value = "builder" + } + task "${job_name}" { + artifact { + source = "git::https://github.com/FDio/csit" + destination = "local/csit" + } + driver = "docker" + config { + image = "${image}" + command = "gluesparksubmit" + args = [ + "--driver-memory", "10g", + "--executor-memory", "10g", + "stats.py" + ] + work_dir = "/local/csit/csit.infra.etl" + } + env { + AWS_ACCESS_KEY_ID = "${aws_access_key_id}" + AWS_SECRET_ACCESS_KEY = "${aws_secret_access_key}" + AWS_DEFAULT_REGION = "${aws_default_region}" + OUT_AWS_ACCESS_KEY_ID = "${out_aws_access_key_id}" + OUT_AWS_SECRET_ACCESS_KEY = "${out_aws_secret_access_key}" + OUT_AWS_DEFAULT_REGION = "${out_aws_default_region}" + ${ envs } + } + resources { + cpu = ${cpu} + memory = ${memory} + } + } + } +} diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-trending-hoststack.hcl.tftpl b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-trending-hoststack.hcl.tftpl new file mode 100644 index 0000000000..24aa4095d2 --- /dev/null +++ b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-trending-hoststack.hcl.tftpl @@ -0,0 +1,53 @@ +job "${job_name}" { + datacenters = "${datacenters}" + type = "${type}" + periodic { + cron = "${cron}" + prohibit_overlap = "${prohibit_overlap}" + time_zone = "${time_zone}" + } + group "${job_name}" { + restart { + mode = "fail" + } + constraint { + attribute = "$${attr.cpu.arch}" + operator = "!=" + value = "arm64" + } + constraint { + attribute = "$${node.class}" + value = "builder" + } + task "${job_name}" { + artifact { + source = "git::https://github.com/FDio/csit" + destination = "local/csit" + } + driver = "docker" + config { + image = "${image}" + command = "gluesparksubmit" + args = [ + "--driver-memory", "30g", + "--executor-memory", "30g", + "trending_hoststack.py" + ] + work_dir = "/local/csit/csit.infra.etl" + } + env { + AWS_ACCESS_KEY_ID = "${aws_access_key_id}" + AWS_SECRET_ACCESS_KEY = "${aws_secret_access_key}" + AWS_DEFAULT_REGION = "${aws_default_region}" + OUT_AWS_ACCESS_KEY_ID = "${out_aws_access_key_id}" + OUT_AWS_SECRET_ACCESS_KEY = "${out_aws_secret_access_key}" + OUT_AWS_DEFAULT_REGION = "${out_aws_default_region}" + ${ envs } + } + resources { + cpu = ${cpu} + memory = ${memory} + } + } + } +}
\ No newline at end of file diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-trending-mrr.hcl.tftpl b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-trending-mrr.hcl.tftpl new file mode 100644 index 0000000000..47d6149eed --- /dev/null +++ b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-trending-mrr.hcl.tftpl @@ -0,0 +1,53 @@ +job "${job_name}" { + datacenters = "${datacenters}" + type = "${type}" + periodic { + cron = "${cron}" + prohibit_overlap = "${prohibit_overlap}" + time_zone = "${time_zone}" + } + group "${job_name}" { + restart { + mode = "fail" + } + constraint { + attribute = "$${attr.cpu.arch}" + operator = "!=" + value = "arm64" + } + constraint { + attribute = "$${node.class}" + value = "builder" + } + task "${job_name}" { + artifact { + source = "git::https://github.com/FDio/csit" + destination = "local/csit" + } + driver = "docker" + config { + image = "${image}" + command = "gluesparksubmit" + args = [ + "--driver-memory", "30g", + "--executor-memory", "30g", + "trending_mrr.py" + ] + work_dir = "/local/csit/csit.infra.etl" + } + env { + AWS_ACCESS_KEY_ID = "${aws_access_key_id}" + AWS_SECRET_ACCESS_KEY = "${aws_secret_access_key}" + AWS_DEFAULT_REGION = "${aws_default_region}" + OUT_AWS_ACCESS_KEY_ID = "${out_aws_access_key_id}" + OUT_AWS_SECRET_ACCESS_KEY = "${out_aws_secret_access_key}" + OUT_AWS_DEFAULT_REGION = "${out_aws_default_region}" + ${ envs } + } + resources { + cpu = ${cpu} + memory = ${memory} + } + } + } +}
\ No newline at end of file diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-trending-ndrpdr.hcl.tftpl b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-trending-ndrpdr.hcl.tftpl new file mode 100644 index 0000000000..8cd40f537e --- /dev/null +++ b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-trending-ndrpdr.hcl.tftpl @@ -0,0 +1,55 @@ +job "${job_name}" { + datacenters = "${datacenters}" + type = "${type}" + periodic { + cron = "${cron}" + prohibit_overlap = "${prohibit_overlap}" + time_zone = "${time_zone}" + } + group "${job_name}" { + restart { + mode = "fail" + } + constraint { + attribute = "$${attr.cpu.arch}" + operator = "!=" + value = "arm64" + } + constraint { + attribute = "$${node.class}" + value = "builder" + } + task "${job_name}" { + artifact { + source = "git::https://github.com/FDio/csit" + destination = "local/csit" + } + driver = "docker" + config { + image = "${image}" + command = "gluesparksubmit" + args = [ + "--driver-memory", "30g", + "--executor-memory", "30g", + "--executor-cores", "2", + "--master", "local[2]", + "trending_ndrpdr.py" + ] + work_dir = "/local/csit/csit.infra.etl" + } + env { + AWS_ACCESS_KEY_ID = "${aws_access_key_id}" + AWS_SECRET_ACCESS_KEY = "${aws_secret_access_key}" + AWS_DEFAULT_REGION = "${aws_default_region}" + OUT_AWS_ACCESS_KEY_ID = "${out_aws_access_key_id}" + OUT_AWS_SECRET_ACCESS_KEY = "${out_aws_secret_access_key}" + OUT_AWS_DEFAULT_REGION = "${out_aws_default_region}" + ${ envs } + } + resources { + cpu = ${cpu} + memory = ${memory} + } + } + } +}
\ No newline at end of file diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-trending-soak.hcl.tftpl b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-trending-soak.hcl.tftpl new file mode 100644 index 0000000000..6d77a898df --- /dev/null +++ b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl-trending-soak.hcl.tftpl @@ -0,0 +1,55 @@ +job "${job_name}" { + datacenters = "${datacenters}" + type = "${type}" + periodic { + cron = "${cron}" + prohibit_overlap = "${prohibit_overlap}" + time_zone = "${time_zone}" + } + group "${job_name}" { + restart { + mode = "fail" + } + constraint { + attribute = "$${attr.cpu.arch}" + operator = "!=" + value = "arm64" + } + constraint { + attribute = "$${node.class}" + value = "builder" + } + task "${job_name}" { + artifact { + source = "git::https://github.com/FDio/csit" + destination = "local/csit" + } + driver = "docker" + config { + image = "${image}" + command = "gluesparksubmit" + args = [ + "--driver-memory", "30g", + "--executor-memory", "30g", + "--executor-cores", "2", + "--master", "local[2]", + "trending_soak.py" + ] + work_dir = "/local/csit/csit.infra.etl" + } + env { + AWS_ACCESS_KEY_ID = "${aws_access_key_id}" + AWS_SECRET_ACCESS_KEY = "${aws_secret_access_key}" + AWS_DEFAULT_REGION = "${aws_default_region}" + OUT_AWS_ACCESS_KEY_ID = "${out_aws_access_key_id}" + OUT_AWS_SECRET_ACCESS_KEY = "${out_aws_secret_access_key}" + OUT_AWS_DEFAULT_REGION = "${out_aws_default_region}" + ${ envs } + } + resources { + cpu = ${cpu} + memory = ${memory} + } + } + } +} diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/fdio/main.tf b/fdio.infra.terraform/terraform-nomad-pyspark-etl/fdio/main.tf new file mode 100644 index 0000000000..aac81d9b78 --- /dev/null +++ b/fdio.infra.terraform/terraform-nomad-pyspark-etl/fdio/main.tf @@ -0,0 +1,283 @@ +data "vault_generic_secret" "fdio_logs" { + path = "kv/secret/data/etl/fdio_logs" +} + +data "vault_generic_secret" "fdio_docs" { + path = "kv/secret/data/etl/fdio_docs" +} + +module "etl-stats" { + providers = { + nomad = nomad.yul1 + } + source = "../" + + aws_access_key_id = data.vault_generic_secret.fdio_logs.data["access_key"] + aws_secret_access_key = data.vault_generic_secret.fdio_logs.data["secret_key"] + aws_default_region = data.vault_generic_secret.fdio_logs.data["region"] + out_aws_access_key_id = data.vault_generic_secret.fdio_docs.data["access_key"] + out_aws_secret_access_key = data.vault_generic_secret.fdio_docs.data["secret_key"] + out_aws_default_region = data.vault_generic_secret.fdio_docs.data["region"] + cron = "0 30 0 * * * *" + datacenters = ["yul1"] + job_name = "etl-stats" +} + +module "etl-trending-hoststack" { + providers = { + nomad = nomad.yul1 + } + source = "../" + + aws_access_key_id = data.vault_generic_secret.fdio_logs.data["access_key"] + aws_secret_access_key = data.vault_generic_secret.fdio_logs.data["secret_key"] + aws_default_region = data.vault_generic_secret.fdio_logs.data["region"] + out_aws_access_key_id = data.vault_generic_secret.fdio_docs.data["access_key"] + out_aws_secret_access_key = data.vault_generic_secret.fdio_docs.data["secret_key"] + out_aws_default_region = data.vault_generic_secret.fdio_docs.data["region"] + cron = "0 30 0 * * * *" + datacenters = ["yul1"] + job_name = "etl-trending-hoststack" +} + +module "etl-trending-mrr" { + providers = { + nomad = nomad.yul1 + } + source = "../" + + aws_access_key_id = data.vault_generic_secret.fdio_logs.data["access_key"] + aws_secret_access_key = data.vault_generic_secret.fdio_logs.data["secret_key"] + aws_default_region = data.vault_generic_secret.fdio_logs.data["region"] + out_aws_access_key_id = data.vault_generic_secret.fdio_docs.data["access_key"] + out_aws_secret_access_key = data.vault_generic_secret.fdio_docs.data["secret_key"] + out_aws_default_region = data.vault_generic_secret.fdio_docs.data["region"] + cron = "0 30 0 * * * *" + datacenters = ["yul1"] + job_name = "etl-trending-mrr" + memory = 60000 +} + +module "etl-trending-ndrpdr" { + providers = { + nomad = nomad.yul1 + } + source = "../" + + aws_access_key_id = data.vault_generic_secret.fdio_logs.data["access_key"] + aws_secret_access_key = data.vault_generic_secret.fdio_logs.data["secret_key"] + aws_default_region = data.vault_generic_secret.fdio_logs.data["region"] + out_aws_access_key_id = data.vault_generic_secret.fdio_docs.data["access_key"] + out_aws_secret_access_key = data.vault_generic_secret.fdio_docs.data["secret_key"] + out_aws_default_region = data.vault_generic_secret.fdio_docs.data["region"] + cron = "0 30 0 * * * *" + datacenters = ["yul1"] + job_name = "etl-trending-ndrpdr" + memory = 60000 +} + +module "etl-trending-soak" { + providers = { + nomad = nomad.yul1 + } + source = "../" + + aws_access_key_id = data.vault_generic_secret.fdio_logs.data["access_key"] + aws_secret_access_key = data.vault_generic_secret.fdio_logs.data["secret_key"] + aws_default_region = data.vault_generic_secret.fdio_logs.data["region"] + out_aws_access_key_id = data.vault_generic_secret.fdio_docs.data["access_key"] + out_aws_secret_access_key = data.vault_generic_secret.fdio_docs.data["secret_key"] + out_aws_default_region = data.vault_generic_secret.fdio_docs.data["region"] + cron = "0 30 0 * * * *" + datacenters = ["yul1"] + job_name = "etl-trending-soak" + memory = 60000 +} + +#module "etl-iterative-hoststack-rls2402" { +# providers = { +# nomad = nomad.yul1 +# } +# source = "../" +# +# aws_access_key_id = data.vault_generic_secret.fdio_logs.data["access_key"] +# aws_secret_access_key = data.vault_generic_secret.fdio_logs.data["secret_key"] +# aws_default_region = data.vault_generic_secret.fdio_logs.data["region"] +# out_aws_access_key_id = data.vault_generic_secret.fdio_docs.data["access_key"] +# out_aws_secret_access_key = data.vault_generic_secret.fdio_docs.data["secret_key"] +# out_aws_default_region = data.vault_generic_secret.fdio_docs.data["region"] +# cron = "0 30 0 * * * *" +# datacenters = ["yul1"] +# job_name = "etl-iterative-hoststack-rls2402" +#} +# +#module "etl-iterative-mrr-rls2402" { +# providers = { +# nomad = nomad.yul1 +# } +# source = "../" +# +# aws_access_key_id = data.vault_generic_secret.fdio_logs.data["access_key"] +# aws_secret_access_key = data.vault_generic_secret.fdio_logs.data["secret_key"] +# aws_default_region = data.vault_generic_secret.fdio_logs.data["region"] +# out_aws_access_key_id = data.vault_generic_secret.fdio_docs.data["access_key"] +# out_aws_secret_access_key = data.vault_generic_secret.fdio_docs.data["secret_key"] +# out_aws_default_region = data.vault_generic_secret.fdio_docs.data["region"] +# cron = "0 30 0 * * * *" +# datacenters = ["yul1"] +# job_name = "etl-iterative-mrr-rls2402" +#} +# +#module "etl-iterative-ndrpdr-rls2402" { +# providers = { +# nomad = nomad.yul1 +# } +# source = "../" +# +# aws_access_key_id = data.vault_generic_secret.fdio_logs.data["access_key"] +# aws_secret_access_key = data.vault_generic_secret.fdio_logs.data["secret_key"] +# aws_default_region = data.vault_generic_secret.fdio_logs.data["region"] +# out_aws_access_key_id = data.vault_generic_secret.fdio_docs.data["access_key"] +# out_aws_secret_access_key = data.vault_generic_secret.fdio_docs.data["secret_key"] +# out_aws_default_region = data.vault_generic_secret.fdio_docs.data["region"] +# cron = "0 30 0 * * * *" +# datacenters = ["yul1"] +# job_name = "etl-iterative-ndrpdr-rls2402" +#} +# +#module "etl-iterative-reconf-rls2402" { +# providers = { +# nomad = nomad.yul1 +# } +# source = "../" +# +# aws_access_key_id = data.vault_generic_secret.fdio_logs.data["access_key"] +# aws_secret_access_key = data.vault_generic_secret.fdio_logs.data["secret_key"] +# aws_default_region = data.vault_generic_secret.fdio_logs.data["region"] +# out_aws_access_key_id = data.vault_generic_secret.fdio_docs.data["access_key"] +# out_aws_secret_access_key = data.vault_generic_secret.fdio_docs.data["secret_key"] +# out_aws_default_region = data.vault_generic_secret.fdio_docs.data["region"] +# cron = "0 30 0 * * * *" +# datacenters = ["yul1"] +# job_name = "etl-iterative-reconf-rls2402" +#} +# +#module "etl-iterative-soak-rls2402" { +# providers = { +# nomad = nomad.yul1 +# } +# source = "../" +# +# aws_access_key_id = data.vault_generic_secret.fdio_logs.data["access_key"] +# aws_secret_access_key = data.vault_generic_secret.fdio_logs.data["secret_key"] +# aws_default_region = data.vault_generic_secret.fdio_logs.data["region"] +# out_aws_access_key_id = data.vault_generic_secret.fdio_docs.data["access_key"] +# out_aws_secret_access_key = data.vault_generic_secret.fdio_docs.data["secret_key"] +# out_aws_default_region = data.vault_generic_secret.fdio_docs.data["region"] +# cron = "0 30 0 * * * *" +# datacenters = ["yul1"] +# job_name = "etl-iterative-soak-rls2402" +#} +# +#module "etl-coverage-device-rls2402" { +# providers = { +# nomad = nomad.yul1 +# } +# source = "../" +# +# aws_access_key_id = data.vault_generic_secret.fdio_logs.data["access_key"] +# aws_secret_access_key = data.vault_generic_secret.fdio_logs.data["secret_key"] +# aws_default_region = data.vault_generic_secret.fdio_logs.data["region"] +# out_aws_access_key_id = data.vault_generic_secret.fdio_docs.data["access_key"] +# out_aws_secret_access_key = data.vault_generic_secret.fdio_docs.data["secret_key"] +# out_aws_default_region = data.vault_generic_secret.fdio_docs.data["region"] +# cron = "0 30 0 * * * *" +# datacenters = ["yul1"] +# job_name = "etl-coverage-device-rls2402" +#} +# +#module "etl-coverage-hoststack-rls2402" { +# providers = { +# nomad = nomad.yul1 +# } +# source = "../" +# +# aws_access_key_id = data.vault_generic_secret.fdio_logs.data["access_key"] +# aws_secret_access_key = data.vault_generic_secret.fdio_logs.data["secret_key"] +# aws_default_region = data.vault_generic_secret.fdio_logs.data["region"] +# out_aws_access_key_id = data.vault_generic_secret.fdio_docs.data["access_key"] +# out_aws_secret_access_key = data.vault_generic_secret.fdio_docs.data["secret_key"] +# out_aws_default_region = data.vault_generic_secret.fdio_docs.data["region"] +# cron = "0 30 0 * * * *" +# datacenters = ["yul1"] +# job_name = "etl-coverage-hoststack-rls2402" +#} +# +#module "etl-coverage-mrr-rls2402" { +# providers = { +# nomad = nomad.yul1 +# } +# source = "../" +# +# aws_access_key_id = data.vault_generic_secret.fdio_logs.data["access_key"] +# aws_secret_access_key = data.vault_generic_secret.fdio_logs.data["secret_key"] +# aws_default_region = data.vault_generic_secret.fdio_logs.data["region"] +# out_aws_access_key_id = data.vault_generic_secret.fdio_docs.data["access_key"] +# out_aws_secret_access_key = data.vault_generic_secret.fdio_docs.data["secret_key"] +# out_aws_default_region = data.vault_generic_secret.fdio_docs.data["region"] +# cron = "0 30 0 * * * *" +# datacenters = ["yul1"] +# job_name = "etl-coverage-mrr-rls2402" +#} +# +#module "etl-coverage-ndrpdr-rls2402" { +# providers = { +# nomad = nomad.yul1 +# } +# source = "../" +# +# aws_access_key_id = data.vault_generic_secret.fdio_logs.data["access_key"] +# aws_secret_access_key = data.vault_generic_secret.fdio_logs.data["secret_key"] +# aws_default_region = data.vault_generic_secret.fdio_logs.data["region"] +# out_aws_access_key_id = data.vault_generic_secret.fdio_docs.data["access_key"] +# out_aws_secret_access_key = data.vault_generic_secret.fdio_docs.data["secret_key"] +# out_aws_default_region = data.vault_generic_secret.fdio_docs.data["region"] +# cron = "0 30 0 * * * *" +# datacenters = ["yul1"] +# job_name = "etl-coverage-ndrpdr-rls2402" +#} +# +#module "etl-coverage-reconf-rls2402" { +# providers = { +# nomad = nomad.yul1 +# } +# source = "../" +# +# aws_access_key_id = data.vault_generic_secret.fdio_logs.data["access_key"] +# aws_secret_access_key = data.vault_generic_secret.fdio_logs.data["secret_key"] +# aws_default_region = data.vault_generic_secret.fdio_logs.data["region"] +# out_aws_access_key_id = data.vault_generic_secret.fdio_docs.data["access_key"] +# out_aws_secret_access_key = data.vault_generic_secret.fdio_docs.data["secret_key"] +# out_aws_default_region = data.vault_generic_secret.fdio_docs.data["region"] +# cron = "0 30 0 * * * *" +# datacenters = ["yul1"] +# job_name = "etl-coverage-reconf-rls2402" +#} +# +#module "etl-coverage-soak-rls2402" { +# providers = { +# nomad = nomad.yul1 +# } +# source = "../" +# +# aws_access_key_id = data.vault_generic_secret.fdio_logs.data["access_key"] +# aws_secret_access_key = data.vault_generic_secret.fdio_logs.data["secret_key"] +# aws_default_region = data.vault_generic_secret.fdio_logs.data["region"] +# out_aws_access_key_id = data.vault_generic_secret.fdio_docs.data["access_key"] +# out_aws_secret_access_key = data.vault_generic_secret.fdio_docs.data["secret_key"] +# out_aws_default_region = data.vault_generic_secret.fdio_docs.data["region"] +# cron = "0 30 0 * * * *" +# datacenters = ["yul1"] +# job_name = "etl-coverage-soak-rls2402" +#} +#
\ No newline at end of file diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/fdio/providers.tf b/fdio.infra.terraform/terraform-nomad-pyspark-etl/fdio/providers.tf new file mode 100644 index 0000000000..c6617da02b --- /dev/null +++ b/fdio.infra.terraform/terraform-nomad-pyspark-etl/fdio/providers.tf @@ -0,0 +1,13 @@ +provider "nomad" { + address = var.nomad_provider_address + alias = "yul1" + # ca_file = var.nomad_provider_ca_file + # cert_file = var.nomad_provider_cert_file + # key_file = var.nomad_provider_key_file +} + +provider "vault" { + address = var.vault_provider_address + skip_tls_verify = var.vault_provider_skip_tls_verify + token = var.vault_provider_token +} diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/fdio/variables.tf b/fdio.infra.terraform/terraform-nomad-pyspark-etl/fdio/variables.tf new file mode 100644 index 0000000000..db24bdf0fa --- /dev/null +++ b/fdio.infra.terraform/terraform-nomad-pyspark-etl/fdio/variables.tf @@ -0,0 +1,47 @@ +variable "nomad_acl" { + description = "Nomad ACLs enabled/disabled." + type = bool + default = false +} + +variable "nomad_provider_address" { + description = "FD.io Nomad cluster address." + type = string + default = "http://10.30.51.23:4646" +} + +variable "nomad_provider_ca_file" { + description = "A local file path to a PEM-encoded certificate authority." + type = string + default = "/etc/nomad.d/ssl/nomad-ca.pem" +} + +variable "nomad_provider_cert_file" { + description = "A local file path to a PEM-encoded certificate." + type = string + default = "/etc/nomad.d/ssl/nomad-cli.pem" +} + +variable "nomad_provider_key_file" { + description = "A local file path to a PEM-encoded private key." + type = string + default = "/etc/nomad.d/ssl/nomad-cli-key.pem" +} + +variable "vault_provider_address" { + description = "Vault cluster address." + type = string + default = "http://10.30.51.23:8200" +} + +variable "vault_provider_skip_tls_verify" { + description = "Verification of the Vault server's TLS certificate." + type = bool + default = false +} + +variable "vault_provider_token" { + description = "Vault root token." + type = string + sensitive = true +} diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/fdio/versions.tf b/fdio.infra.terraform/terraform-nomad-pyspark-etl/fdio/versions.tf new file mode 100644 index 0000000000..0c05e76d65 --- /dev/null +++ b/fdio.infra.terraform/terraform-nomad-pyspark-etl/fdio/versions.tf @@ -0,0 +1,17 @@ +terraform { + backend "consul" { + address = "10.30.51.23:8500" + scheme = "http" + path = "terraform/etl" + } + required_providers { + nomad = { + source = "hashicorp/nomad" + version = ">= 1.4.20" + } + vault = { + version = ">= 3.12.0" + } + } + required_version = ">= 1.5.4" +} diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/main.tf b/fdio.infra.terraform/terraform-nomad-pyspark-etl/main.tf new file mode 100644 index 0000000000..cd6a9a52ff --- /dev/null +++ b/fdio.infra.terraform/terraform-nomad-pyspark-etl/main.tf @@ -0,0 +1,33 @@ +locals { + datacenters = join(",", var.datacenters) + envs = join("\n", concat([], var.envs)) +} + +resource "nomad_job" "nomad_job" { + jobspec = templatefile( + "${path.module}/conf/nomad/${var.job_name}.hcl.tftpl", + { + aws_access_key_id = var.aws_access_key_id, + aws_secret_access_key = var.aws_secret_access_key, + aws_default_region = var.aws_default_region + cpu = var.cpu, + cron = var.cron, + datacenters = local.datacenters, + envs = local.envs, + image = var.image, + job_name = var.job_name, + memory = var.memory, + out_aws_access_key_id = var.out_aws_access_key_id, + out_aws_secret_access_key = var.out_aws_secret_access_key, + out_aws_default_region = var.out_aws_default_region + prohibit_overlap = var.prohibit_overlap, + time_zone = var.time_zone, + type = var.type, + use_vault_provider = var.vault_secret.use_vault_provider, + vault_kv_policy_name = var.vault_secret.vault_kv_policy_name, + vault_kv_path = var.vault_secret.vault_kv_path, + vault_kv_field_access_key = var.vault_secret.vault_kv_field_access_key, + vault_kv_field_secret_key = var.vault_secret.vault_kv_field_secret_key + }) + detach = false +} diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/variables.tf b/fdio.infra.terraform/terraform-nomad-pyspark-etl/variables.tf new file mode 100644 index 0000000000..f6d318e855 --- /dev/null +++ b/fdio.infra.terraform/terraform-nomad-pyspark-etl/variables.tf @@ -0,0 +1,115 @@ +# Nomad +variable "datacenters" { + description = "Specifies the list of DCs to be considered placing this task." + type = list(string) + default = ["dc1"] +} + +# ETL +variable "aws_access_key_id" { + description = "AWS access key." + type = string + default = "aws" +} + +variable "aws_secret_access_key" { + description = "AWS secret key" + type = string + default = "aws" +} + +variable "aws_default_region" { + description = "AWS region" + type = string + default = "aws" +} + +variable "cpu" { + description = "Specifies the CPU required to run this task in MHz." + type = number + default = 10000 +} + +variable "cron" { + description = "Specifies a cron expression configuring the interval to launch." + type = string + default = "@daily" +} + +variable "envs" { + description = "Specifies ETL environment variables." + type = list(string) + default = [] +} + +variable "image" { + description = "Specifies the Docker image to run." + type = string + default = "pmikus/docker-ubuntu-focal-aws-glue:latest" +} + +variable "job_name" { + description = "Specifies a name for the job." + type = string + default = "etl" +} + +variable "memory" { + description = "Specifies the memory required in MB." + type = number + default = 50000 +} + +variable "out_aws_access_key_id" { + description = "AWS access key." + type = string + default = "aws" +} + +variable "out_aws_secret_access_key" { + description = "AWS secret key" + type = string + default = "aws" +} + +variable "out_aws_default_region" { + description = "AWS region" + type = string + default = "aws" +} + +variable "prohibit_overlap" { + description = "Specifies if this job should wait until previous completed." + type = bool + default = true +} + +variable "time_zone" { + description = "Specifies the time zone to evaluate the next launch interval." + type = string + default = "UTC" +} + +variable "type" { + description = "Specifies the Nomad scheduler to use." + type = string + default = "batch" +} + +variable "vault_secret" { + type = object({ + use_vault_provider = bool, + vault_kv_policy_name = string, + vault_kv_path = string, + vault_kv_field_access_key = string, + vault_kv_field_secret_key = string + }) + description = "Set of properties to be able to fetch secret from vault." + default = { + use_vault_provider = false + vault_kv_policy_name = "kv" + vault_kv_path = "secret/data/etl" + vault_kv_field_access_key = "access_key" + vault_kv_field_secret_key = "secret_key" + } +} diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/versions.tf b/fdio.infra.terraform/terraform-nomad-pyspark-etl/versions.tf new file mode 100644 index 0000000000..f40435fe77 --- /dev/null +++ b/fdio.infra.terraform/terraform-nomad-pyspark-etl/versions.tf @@ -0,0 +1,9 @@ +terraform { + required_providers { + nomad = { + source = "hashicorp/nomad" + version = ">= 1.4.20" + } + } + required_version = ">= 1.5.4" +} |