|
@@ -1,17 +1,25 @@
|
|
|
+# Note: Instance Types cannot be 'amd epyc', e.g. m5a.* and r5a.*
|
|
|
+# Note: Instance Types cannot be 'graviton', e.g. m6g.*
|
|
|
+
|
|
|
resource "aws_emr_cluster" "cluster" {
|
|
|
name = local.unique_id
|
|
|
release_label = "emr-6.8.0" # aws emr list-release-labels
|
|
|
- applications = ["Spark", "Hadoop", "JupyterEnterpriseGateway", "JupyterHub"]
|
|
|
- log_uri = "s3://${aws_s3_bucket.s3_emr.id}/logs/"
|
|
|
+ # per https://docs.aws.amazon.com/emr/latest/ManagementGuide/emr-managed-notebooks-considerations.html,
|
|
|
+ # these ara the 4 applications that are needed
|
|
|
+ applications = ["Spark", "Hadoop", "Livy", "JupyterEnterpriseGateway", ]
|
|
|
+ # Maybe add : "JupyterHub"]
|
|
|
+
|
|
|
+ log_uri = "s3://${aws_s3_bucket.s3_emr.id}/logs/"
|
|
|
|
|
|
termination_protection = false # may want 'true' for production
|
|
|
- keep_job_flow_alive_when_no_steps = false # If true, workers stay active. If false, HDFS is lost.
|
|
|
+ keep_job_flow_alive_when_no_steps = true # If true, cluster sticks around when nothing to do
|
|
|
|
|
|
ec2_attributes {
|
|
|
subnet_id = aws_subnet.main.id
|
|
|
emr_managed_master_security_group = aws_security_group.allow_access.id
|
|
|
emr_managed_slave_security_group = aws_security_group.allow_access.id
|
|
|
instance_profile = aws_iam_instance_profile.emr_profile.arn
|
|
|
+ key_name = local.key_pair
|
|
|
}
|
|
|
|
|
|
master_instance_fleet {
|
|
@@ -155,7 +163,7 @@ resource "aws_emr_cluster" "cluster" {
|
|
|
resource "aws_emr_managed_scaling_policy" "cluster-asg-policy" {
|
|
|
cluster_id = aws_emr_cluster.cluster.id
|
|
|
compute_limits {
|
|
|
- unit_type = "Instances"
|
|
|
+ unit_type = "InstanceFleetUnits"
|
|
|
minimum_capacity_units = 1
|
|
|
maximum_capacity_units = 10
|
|
|
maximum_ondemand_capacity_units = 2
|