aboutsummaryrefslogtreecommitdiffstats
path: root/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf
diff options
context:
space:
mode:
Diffstat (limited to 'fdio.infra.terraform/terraform-nomad-pyspark-etl/conf')
-rw-r--r--fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl.hcl.tftpl318
1 files changed, 318 insertions, 0 deletions
diff --git a/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl.hcl.tftpl b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl.hcl.tftpl
new file mode 100644
index 0000000000..208fb0a59f
--- /dev/null
+++ b/fdio.infra.terraform/terraform-nomad-pyspark-etl/conf/nomad/etl.hcl.tftpl
@@ -0,0 +1,318 @@
+job "${job_name}" {
+ # The "datacenters" parameter specifies the list of datacenters which should
+ # be considered when placing this task. This must be provided.
+ datacenters = "${datacenters}"
+
+ # The "type" parameter controls the type of job, which impacts the scheduler's
+ # decision on placement. For a full list of job types and their differences,
+ # please see the online documentation.
+ #
+ # https://www.nomadproject.io/docs/jobspec/schedulers
+ #
+ type = "${type}"
+
+ # The periodic stanza allows a job to run at fixed times, dates, or intervals.
+ # The easiest way to think about the periodic scheduler is "Nomad cron" or
+ # "distributed cron".
+ #
+ # https://www.nomadproject.io/docs/job-specification/periodic
+ #
+ periodic {
+ cron = "${cron}"
+ prohibit_overlap = "${prohibit_overlap}"
+ time_zone = "${time_zone}"
+ }
+
+ # The "group" stanza defines a series of tasks that should be co-located on
+ # the same Nomad client. Any task within a group will be placed on the same
+ # client.
+ #
+ # https://www.nomadproject.io/docs/job-specification/group
+ #
+ group "${job_name}-master" {
+ # The restart stanza configures a tasks's behavior on task failure. Restarts
+ # happen on the client that is running the task.
+ #
+ # https://www.nomadproject.io/docs/job-specification/restart
+ #
+ restart {
+ mode = "fail"
+ }
+
+ # The constraint allows restricting the set of eligible nodes. Constraints
+ # may filter on attributes or client metadata.
+ #
+ # For more information and examples on the "volume" stanza, please see
+ # the online documentation at:
+ #
+ # https://www.nomadproject.io/docs/job-specification/constraint
+ #
+ constraint {
+ attribute = "$${attr.cpu.arch}"
+ operator = "!="
+ value = "arm64"
+ }
+
+ constraint {
+ attribute = "$${node.class}"
+ value = "builder"
+ }
+
+ # The "task" stanza creates an individual unit of work, such as a Docker
+ # container, web application, or batch processing.
+ #
+ # https://www.nomadproject.io/docs/job-specification/task.html
+ #
+ task "${job_name}-trending" {
+ # The artifact stanza instructs Nomad to fetch and unpack a remote
+ # resource, such as a file, tarball, or binary.
+ #
+ # https://www.nomadproject.io/docs/job-specification/artifact
+ #
+ artifact {
+ source = "git::https://github.com/FDio/csit"
+ destination = "local/csit"
+ }
+
+ # The "driver" parameter specifies the task driver that should be used to
+ # run the task.
+ driver = "docker"
+
+ # The "config" stanza specifies the driver configuration, which is passed
+ # directly to the driver to start the task. The details of configurations
+ # are specific to each driver, so please see specific driver
+ # documentation for more information.
+ config {
+ image = "${image}"
+ command = "gluesparksubmit"
+ args = [
+ "--driver-memory", "20g",
+ "--executor-memory", "20g",
+ "trending.py"
+ ]
+ work_dir = "/local/csit/csit.infra.etl"
+ }
+
+ # The env stanza configures a list of environment variables to populate
+ # the task's environment before starting.
+ env {
+ AWS_ACCESS_KEY_ID = "${aws_access_key_id}"
+ AWS_SECRET_ACCESS_KEY = "${aws_secret_access_key}"
+ AWS_DEFAULT_REGION = "${aws_default_region}"
+ OUT_AWS_ACCESS_KEY_ID = "${out_aws_access_key_id}"
+ OUT_AWS_SECRET_ACCESS_KEY = "${out_aws_secret_access_key}"
+ OUT_AWS_DEFAULT_REGION = "${out_aws_default_region}"
+ ${ envs }
+ }
+
+ # The "resources" stanza describes the requirements a task needs to
+ # execute. Resource requirements include memory, network, cpu, and more.
+ # This ensures the task will execute on a machine that contains enough
+ # resource capacity.
+ #
+ # https://www.nomadproject.io/docs/job-specification/resources
+ #
+ resources {
+ cpu = ${cpu}
+ memory = ${memory}
+ }
+ }
+ task "${job_name}-stats" {
+ # The artifact stanza instructs Nomad to fetch and unpack a remote
+ # resource, such as a file, tarball, or binary.
+ #
+ # https://www.nomadproject.io/docs/job-specification/artifact
+ #
+ artifact {
+ source = "git::https://github.com/FDio/csit"
+ destination = "local/csit"
+ }
+
+ # The "driver" parameter specifies the task driver that should be used to
+ # run the task.
+ driver = "docker"
+
+ # The "config" stanza specifies the driver configuration, which is passed
+ # directly to the driver to start the task. The details of configurations
+ # are specific to each driver, so please see specific driver
+ # documentation for more information.
+ config {
+ image = "${image}"
+ command = "gluesparksubmit"
+ args = [
+ "--driver-memory", "10g",
+ "--executor-memory", "10g",
+ "stats.py"
+ ]
+ work_dir = "/local/csit/csit.infra.etl"
+ }
+
+ # The env stanza configures a list of environment variables to populate
+ # the task's environment before starting.
+ env {
+ AWS_ACCESS_KEY_ID = "${aws_access_key_id}"
+ AWS_SECRET_ACCESS_KEY = "${aws_secret_access_key}"
+ AWS_DEFAULT_REGION = "${aws_default_region}"
+ OUT_AWS_ACCESS_KEY_ID = "${out_aws_access_key_id}"
+ OUT_AWS_SECRET_ACCESS_KEY = "${out_aws_secret_access_key}"
+ OUT_AWS_DEFAULT_REGION = "${out_aws_default_region}"
+ ${ envs }
+ }
+
+ # The "resources" stanza describes the requirements a task needs to
+ # execute. Resource requirements include memory, network, cpu, and more.
+ # This ensures the task will execute on a machine that contains enough
+ # resource capacity.
+ #
+ # https://www.nomadproject.io/docs/job-specification/resources
+ #
+ resources {
+ cpu = ${cpu}
+ memory = ${memory}
+ }
+ }
+ }
+ group "${job_name}-rls2202" {
+ # The restart stanza configures a tasks's behavior on task failure. Restarts
+ # happen on the client that is running the task.
+ #
+ # https://www.nomadproject.io/docs/job-specification/restart
+ #
+ restart {
+ mode = "fail"
+ }
+
+ # The constraint allows restricting the set of eligible nodes. Constraints
+ # may filter on attributes or client metadata.
+ #
+ # For more information and examples on the "volume" stanza, please see
+ # the online documentation at:
+ #
+ # https://www.nomadproject.io/docs/job-specification/constraint
+ #
+ constraint {
+ attribute = "$${attr.cpu.arch}"
+ operator = "!="
+ value = "arm64"
+ }
+
+ constraint {
+ attribute = "$${node.class}"
+ value = "builder"
+ }
+
+ # The "task" stanza creates an individual unit of work, such as a Docker
+ # container, web application, or batch processing.
+ #
+ # https://www.nomadproject.io/docs/job-specification/task.html
+ #
+ task "${job_name}-coverage" {
+ # The artifact stanza instructs Nomad to fetch and unpack a remote
+ # resource, such as a file, tarball, or binary.
+ #
+ # https://www.nomadproject.io/docs/job-specification/artifact
+ #
+ artifact {
+ source = "git::https://github.com/FDio/csit"
+ destination = "local/csit"
+ }
+
+ # The "driver" parameter specifies the task driver that should be used to
+ # run the task.
+ driver = "docker"
+
+ # The "config" stanza specifies the driver configuration, which is passed
+ # directly to the driver to start the task. The details of configurations
+ # are specific to each driver, so please see specific driver
+ # documentation for more information.
+ config {
+ image = "${image}"
+ command = "gluesparksubmit"
+ args = [
+ "--driver-memory", "20g",
+ "--executor-memory", "20g",
+ "coverage_rls2202.py"
+ ]
+ work_dir = "/local/csit/csit.infra.etl"
+ }
+
+ # The env stanza configures a list of environment variables to populate
+ # the task's environment before starting.
+ env {
+ AWS_ACCESS_KEY_ID = "${aws_access_key_id}"
+ AWS_SECRET_ACCESS_KEY = "${aws_secret_access_key}"
+ AWS_DEFAULT_REGION = "${aws_default_region}"
+ OUT_AWS_ACCESS_KEY_ID = "${out_aws_access_key_id}"
+ OUT_AWS_SECRET_ACCESS_KEY = "${out_aws_secret_access_key}"
+ OUT_AWS_DEFAULT_REGION = "${out_aws_default_region}"
+ ${ envs }
+ }
+
+ # The "resources" stanza describes the requirements a task needs to
+ # execute. Resource requirements include memory, network, cpu, and more.
+ # This ensures the task will execute on a machine that contains enough
+ # resource capacity.
+ #
+ # https://www.nomadproject.io/docs/job-specification/resources
+ #
+ resources {
+ cpu = ${cpu}
+ memory = ${memory}
+ }
+ }
+ task "${job_name}-iterative" {
+ # The artifact stanza instructs Nomad to fetch and unpack a remote
+ # resource, such as a file, tarball, or binary.
+ #
+ # https://www.nomadproject.io/docs/job-specification/artifact
+ #
+ artifact {
+ source = "git::https://github.com/FDio/csit"
+ destination = "local/csit"
+ }
+
+ # The "driver" parameter specifies the task driver that should be used to
+ # run the task.
+ driver = "docker"
+
+ # The "config" stanza specifies the driver configuration, which is passed
+ # directly to the driver to start the task. The details of configurations
+ # are specific to each driver, so please see specific driver
+ # documentation for more information.
+ config {
+ image = "${image}"
+ command = "gluesparksubmit"
+ args = [
+ "--driver-memory", "20g",
+ "--executor-memory", "20g",
+ "iterative_rls2202.py"
+ ]
+ work_dir = "/local/csit/csit.infra.etl"
+ }
+
+ # The env stanza configures a list of environment variables to populate
+ # the task's environment before starting.
+ env {
+ AWS_ACCESS_KEY_ID = "${aws_access_key_id}"
+ AWS_SECRET_ACCESS_KEY = "${aws_secret_access_key}"
+ AWS_DEFAULT_REGION = "${aws_default_region}"
+ OUT_AWS_ACCESS_KEY_ID = "${out_aws_access_key_id}"
+ OUT_AWS_SECRET_ACCESS_KEY = "${out_aws_secret_access_key}"
+ OUT_AWS_DEFAULT_REGION = "${out_aws_default_region}"
+ ${ envs }
+ }
+
+ # The "resources" stanza describes the requirements a task needs to
+ # execute. Resource requirements include memory, network, cpu, and more.
+ # This ensures the task will execute on a machine that contains enough
+ # resource capacity.
+ #
+ # https://www.nomadproject.io/docs/job-specification/resources
+ #
+ resources {
+ cpu = ${cpu}
+ memory = ${memory}
+ }
+ }
+ }
+}