2 лет назад · 6a33778245
--- a/config.tf
+++ b/config.tf
@@ -2,6 +2,9 @@ locals {
 
				   # unique id is used for terraform backend state storage. Duplicates _will_ be a problem.
			
 
				   unique_id = "monkeybox_emr_lab_jupyter"
			
 
				 
			
 
				+  # an ssh key pair that must already exist in EC2
			
 
				+  key_pair = "Fred-IO"
			
 
				+
			
 
				   # Everything here should be self-explanatory
			
 
				   profile = "default"
			
 
				   region  = "us-east-2"
			
--- a/emr.tf
+++ b/emr.tf
@@ -1,17 +1,25 @@
 
				+# Note: Instance Types cannot be 'amd epyc', e.g. m5a.* and r5a.*
			
 
				+# Note: Instance Types cannot be 'graviton', e.g. m6g.*
			
 
				+
			
 
				 resource "aws_emr_cluster" "cluster" {
			
 
				   name          = local.unique_id
			
 
				   release_label = "emr-6.8.0" # aws emr list-release-labels
			
 
				-  applications  = ["Spark", "Hadoop", "JupyterEnterpriseGateway", "JupyterHub"]
			
 
				-  log_uri       = "s3://${aws_s3_bucket.s3_emr.id}/logs/"
			
 
				+  # per https://docs.aws.amazon.com/emr/latest/ManagementGuide/emr-managed-notebooks-considerations.html, 
			
 
				+  # these ara the 4 applications that are needed
			
 
				+  applications = ["Spark", "Hadoop", "Livy", "JupyterEnterpriseGateway", ]
			
 
				+  # Maybe add : "JupyterHub"]
			
 
				+
			
 
				+  log_uri = "s3://${aws_s3_bucket.s3_emr.id}/logs/"
			
 
				 
			
 
				   termination_protection            = false # may want 'true' for production
			
 
				-  keep_job_flow_alive_when_no_steps = false # If true, workers stay active. If false, HDFS is lost.
			
 
				+  keep_job_flow_alive_when_no_steps = true  # If true, cluster sticks around when nothing to do
			
 
				 
			
 
				   ec2_attributes {
			
 
				     subnet_id                         = aws_subnet.main.id
			
 
				     emr_managed_master_security_group = aws_security_group.allow_access.id
			
 
				     emr_managed_slave_security_group  = aws_security_group.allow_access.id
			
 
				     instance_profile                  = aws_iam_instance_profile.emr_profile.arn
			
 
				+    key_name                          = local.key_pair
			
 
				   }
			
 
				 
			
 
				   master_instance_fleet {
			
@@ -155,7 +163,7 @@ resource "aws_emr_cluster" "cluster" {
 
				 resource "aws_emr_managed_scaling_policy" "cluster-asg-policy" {
			
 
				   cluster_id = aws_emr_cluster.cluster.id
			
 
				   compute_limits {
			
 
				-    unit_type                       = "Instances"
			
 
				+    unit_type                       = "InstanceFleetUnits"
			
 
				     minimum_capacity_units          = 1
			
 
				     maximum_capacity_units          = 10
			
 
				     maximum_ondemand_capacity_units = 2
			
--- a/notes.md
+++ b/notes.md
@@ -0,0 +1,8 @@
 
				+Jupyter Notebook and Python versions
			
 
				+
			
 
				+EMR Notebooks runs Jupyter Notebook version 6.0.2
			
 
				+and Python 3.6.5 regardless of the Amazon EMR release version of the attached cluster.
			
 
				+
			
 
				+[source](https://docs.aws.amazon.com/emr/latest/ManagementGuide/emr-managed-notebooks-considerations.html)
			
 
				+
			
 
				+