|
@@ -0,0 +1,347 @@
|
|
|
+#resource "aws_emr_cluster" "cluster" {
|
|
|
+# name = "monkeybox-emr-lab"
|
|
|
+# release_label = "emr-5.36.0"
|
|
|
+# applications = ["Spark"]
|
|
|
+# log_uri = "s3://monkeybox-emr-test/logs"
|
|
|
+#
|
|
|
+# ec2_attributes {
|
|
|
+# subnet_id = aws_subnet.main.id
|
|
|
+# emr_managed_master_security_group = aws_security_group.allow_access.id
|
|
|
+# emr_managed_slave_security_group = aws_security_group.allow_access.id
|
|
|
+# instance_profile = aws_iam_instance_profile.emr_profile.arn
|
|
|
+# }
|
|
|
+#
|
|
|
+# master_instance_group {
|
|
|
+# name = "monkeybox-emr-lab-master"
|
|
|
+# bid_price = "0.07" # Don't set this over the hourly cost!
|
|
|
+# instance_type = "m5.xlarge"
|
|
|
+# }
|
|
|
+#
|
|
|
+# core_instance_group {
|
|
|
+# name = "monkeybox-emr-lab-core"
|
|
|
+# bid_price = "0.07" # Don't set this over the hourly cost!
|
|
|
+# instance_count = 2
|
|
|
+# instance_type = "m5.xlarge"
|
|
|
+# }
|
|
|
+#
|
|
|
+# tags = {
|
|
|
+# project = "monkeybox_emr_lab"
|
|
|
+# }
|
|
|
+#
|
|
|
+# bootstrap_action {
|
|
|
+# path = "s3://us-east-2.elasticmapreduce/bootstrap-actions/run-if"
|
|
|
+# name = "runif"
|
|
|
+# args = ["instance.isMaster=true", "echo running on master node"]
|
|
|
+# }
|
|
|
+#
|
|
|
+# auto_termination_policy {
|
|
|
+# idle_timeout = 60
|
|
|
+# }
|
|
|
+#
|
|
|
+# step {
|
|
|
+# name = "Install modules"
|
|
|
+# action_on_failure = "CONTINUE"
|
|
|
+#
|
|
|
+# hadoop_jar_step {
|
|
|
+# jar = "command-runner.jar"
|
|
|
+# args = ["sudo", "python3", "-m", "pip", "install", "numpy", "matplotlib", "pandas", "seaborn", "pyspark"]
|
|
|
+# }
|
|
|
+# }
|
|
|
+#
|
|
|
+# step {
|
|
|
+# name = "Copy script file from s3."
|
|
|
+# action_on_failure = "CONTINUE"
|
|
|
+#
|
|
|
+# hadoop_jar_step {
|
|
|
+# jar = "command-runner.jar"
|
|
|
+# args = ["aws", "s3", "cp", "s3://monkeybox-emr-test/health_violations.py", "/home/hadoop/"]
|
|
|
+# }
|
|
|
+# }
|
|
|
+#
|
|
|
+# step {
|
|
|
+# name = "Pyspark Job."
|
|
|
+# action_on_failure = "CONTINUE"
|
|
|
+#
|
|
|
+# hadoop_jar_step {
|
|
|
+# jar = "command-runner.jar"
|
|
|
+# args = ["sudo", "python3", "/home/hadoop/health_violations.py", "--data_source", "s3://monkeybox-emr-test/food_establishment_data.csv", "--output_uri", "s3://monkeybox-emr-test/output/"]
|
|
|
+# }
|
|
|
+# }
|
|
|
+#
|
|
|
+# # configurations_json = <<EOF
|
|
|
+# #[
|
|
|
+# # {
|
|
|
+# # "Classification": "hadoop-env",
|
|
|
+# # "Configurations": [
|
|
|
+# # {
|
|
|
+# # "Classification": "export",
|
|
|
+# # "Properties": {
|
|
|
+# # "JAVA_HOME": "/usr/lib/jvm/java-1.8.0"
|
|
|
+# # }
|
|
|
+# # }
|
|
|
+# # ],
|
|
|
+# # "Properties": {}
|
|
|
+# # },
|
|
|
+# # {
|
|
|
+# # "Classification": "spark-env",
|
|
|
+# # "Configurations": [
|
|
|
+# # {
|
|
|
+# # "Classification": "export",
|
|
|
+# # "Properties": {
|
|
|
+# # "JAVA_HOME": "/usr/lib/jvm/java-1.8.0"
|
|
|
+# # }
|
|
|
+# # }
|
|
|
+# # ],
|
|
|
+# # "Properties": {}
|
|
|
+# # }
|
|
|
+# #]
|
|
|
+# #EOF
|
|
|
+#
|
|
|
+# service_role = aws_iam_role.iam_emr_service_role.arn
|
|
|
+#}
|
|
|
+#
|
|
|
+#resource "aws_security_group" "allow_access" {
|
|
|
+# name = "allow_access"
|
|
|
+# description = "Allow inbound traffic"
|
|
|
+# vpc_id = aws_vpc.main.id
|
|
|
+#
|
|
|
+# ingress {
|
|
|
+# from_port = 0
|
|
|
+# to_port = 0
|
|
|
+# protocol = "-1"
|
|
|
+# cidr_blocks = [aws_vpc.main.cidr_block]
|
|
|
+# }
|
|
|
+#
|
|
|
+# egress {
|
|
|
+# from_port = 0
|
|
|
+# to_port = 0
|
|
|
+# protocol = "-1"
|
|
|
+# cidr_blocks = ["0.0.0.0/0"]
|
|
|
+# }
|
|
|
+#
|
|
|
+# depends_on = [aws_subnet.main]
|
|
|
+#
|
|
|
+# lifecycle {
|
|
|
+# ignore_changes = [
|
|
|
+# ingress,
|
|
|
+# egress,
|
|
|
+# ]
|
|
|
+# }
|
|
|
+#
|
|
|
+# tags = {
|
|
|
+# project = "monkeybox_emr_lab"
|
|
|
+# }
|
|
|
+#}
|
|
|
+#
|
|
|
+#resource "aws_vpc" "main" {
|
|
|
+# cidr_block = "172.16.0.0/16"
|
|
|
+# enable_dns_hostnames = true
|
|
|
+#
|
|
|
+# tags = {
|
|
|
+# name = "monkeybox_emr_lab"
|
|
|
+# project = "monkeybox_emr_lab"
|
|
|
+# }
|
|
|
+#}
|
|
|
+#
|
|
|
+#resource "aws_vpc_endpoint_route_table_association" "example" {
|
|
|
+# route_table_id = aws_route_table.r.id
|
|
|
+# vpc_endpoint_id = aws_vpc_endpoint.s3.id
|
|
|
+#}
|
|
|
+#
|
|
|
+#resource "aws_subnet" "main" {
|
|
|
+# vpc_id = aws_vpc.main.id
|
|
|
+# cidr_block = "172.16.0.0/20"
|
|
|
+# map_public_ip_on_launch = true
|
|
|
+#
|
|
|
+# tags = {
|
|
|
+# name = "monkeybox_emr_lab"
|
|
|
+# project = "monkeybox_emr_lab"
|
|
|
+# }
|
|
|
+#}
|
|
|
+#
|
|
|
+#resource "aws_internet_gateway" "gw" {
|
|
|
+# vpc_id = aws_vpc.main.id
|
|
|
+#}
|
|
|
+#
|
|
|
+#resource "aws_vpc_endpoint" "s3" {
|
|
|
+# vpc_id = aws_vpc.main.id
|
|
|
+# service_name = "com.amazonaws.us-east-2.s3"
|
|
|
+#
|
|
|
+# tags = {
|
|
|
+# project = "monkeybox_emr_lab"
|
|
|
+# }
|
|
|
+#}
|
|
|
+#
|
|
|
+#resource "aws_route_table" "r" {
|
|
|
+# vpc_id = aws_vpc.main.id
|
|
|
+#
|
|
|
+# route {
|
|
|
+# cidr_block = "0.0.0.0/0"
|
|
|
+# gateway_id = aws_internet_gateway.gw.id
|
|
|
+# }
|
|
|
+#}
|
|
|
+#
|
|
|
+#resource "aws_main_route_table_association" "a" {
|
|
|
+# vpc_id = aws_vpc.main.id
|
|
|
+# route_table_id = aws_route_table.r.id
|
|
|
+#}
|
|
|
+#
|
|
|
+####
|
|
|
+## IAM Role setups
|
|
|
+####
|
|
|
+#
|
|
|
+## IAM role for EMR Service
|
|
|
+#resource "aws_iam_role" "iam_emr_service_role" {
|
|
|
+# name = "iam_emr_service_role"
|
|
|
+#
|
|
|
+# assume_role_policy = <<EOF
|
|
|
+#{
|
|
|
+# "Version": "2008-10-17",
|
|
|
+# "Statement": [
|
|
|
+# {
|
|
|
+# "Sid": "",
|
|
|
+# "Effect": "Allow",
|
|
|
+# "Principal": {
|
|
|
+# "Service": "elasticmapreduce.amazonaws.com"
|
|
|
+# },
|
|
|
+# "Action": "sts:AssumeRole"
|
|
|
+# }
|
|
|
+# ]
|
|
|
+#}
|
|
|
+#EOF
|
|
|
+#}
|
|
|
+#
|
|
|
+#data "aws_iam_policy_document" "iam_emr_service_policy" {
|
|
|
+# statement {
|
|
|
+# sid = ""
|
|
|
+# effect = "Allow"
|
|
|
+# resources = ["*"]
|
|
|
+#
|
|
|
+# actions = [
|
|
|
+# "ec2:AuthorizeSecurityGroupEgress",
|
|
|
+# "ec2:AuthorizeSecurityGroupIngress",
|
|
|
+# "ec2:CancelSpotInstanceRequests",
|
|
|
+# "ec2:CreateNetworkInterface",
|
|
|
+# "ec2:CreateSecurityGroup",
|
|
|
+# "ec2:CreateTags",
|
|
|
+# "ec2:DeleteNetworkInterface",
|
|
|
+# "ec2:DeleteSecurityGroup",
|
|
|
+# "ec2:DeleteTags",
|
|
|
+# "ec2:DescribeAvailabilityZones",
|
|
|
+# "ec2:DescribeAccountAttributes",
|
|
|
+# "ec2:DescribeDhcpOptions",
|
|
|
+# "ec2:DescribeInstanceStatus",
|
|
|
+# "ec2:DescribeInstances",
|
|
|
+# "ec2:DescribeKeyPairs",
|
|
|
+# "ec2:DescribeNetworkAcls",
|
|
|
+# "ec2:DescribeNetworkInterfaces",
|
|
|
+# "ec2:DescribePrefixLists",
|
|
|
+# "ec2:DescribeRouteTables",
|
|
|
+# "ec2:DescribeSecurityGroups",
|
|
|
+# "ec2:DescribeSpotInstanceRequests",
|
|
|
+# "ec2:DescribeSpotPriceHistory",
|
|
|
+# "ec2:DescribeSubnets",
|
|
|
+# "ec2:DescribeVpcAttribute",
|
|
|
+# "ec2:DescribeVpcEndpoints",
|
|
|
+# "ec2:DescribeVpcEndpointServices",
|
|
|
+# "ec2:DescribeVpcs",
|
|
|
+# "ec2:DetachNetworkInterface",
|
|
|
+# "ec2:ModifyImageAttribute",
|
|
|
+# "ec2:ModifyInstanceAttribute",
|
|
|
+# "ec2:RequestSpotInstances",
|
|
|
+# "ec2:RevokeSecurityGroupEgress",
|
|
|
+# "ec2:RunInstances",
|
|
|
+# "ec2:TerminateInstances",
|
|
|
+# "ec2:DeleteVolume",
|
|
|
+# "ec2:DescribeVolumeStatus",
|
|
|
+# "ec2:DescribeVolumes",
|
|
|
+# "ec2:DetachVolume",
|
|
|
+# "iam:GetRole",
|
|
|
+# "iam:GetRolePolicy",
|
|
|
+# "iam:ListInstanceProfiles",
|
|
|
+# "iam:ListRolePolicies",
|
|
|
+# "iam:PassRole",
|
|
|
+# "s3:*",
|
|
|
+# "sdb:BatchPutAttributes",
|
|
|
+# "sdb:Select",
|
|
|
+# "sqs:CreateQueue",
|
|
|
+# "sqs:Delete*",
|
|
|
+# "sqs:GetQueue*",
|
|
|
+# "sqs:PurgeQueue",
|
|
|
+# "sqs:ReceiveMessage",
|
|
|
+# ]
|
|
|
+# }
|
|
|
+#}
|
|
|
+#
|
|
|
+#resource "aws_iam_role_policy" "iam_emr_service_policy" {
|
|
|
+# name = "iam_emr_service_policy"
|
|
|
+# role = aws_iam_role.iam_emr_service_role.id
|
|
|
+#
|
|
|
+# policy = data.aws_iam_policy_document.iam_emr_service_policy.json
|
|
|
+#}
|
|
|
+#
|
|
|
+## IAM Role for EC2 Instance Profile
|
|
|
+#resource "aws_iam_role" "iam_emr_profile_role" {
|
|
|
+# name = "iam_emr_profile_role"
|
|
|
+#
|
|
|
+# assume_role_policy = <<EOF
|
|
|
+#{
|
|
|
+# "Version": "2008-10-17",
|
|
|
+# "Statement": [
|
|
|
+# {
|
|
|
+# "Sid": "",
|
|
|
+# "Effect": "Allow",
|
|
|
+# "Principal": {
|
|
|
+# "Service": "ec2.amazonaws.com"
|
|
|
+# },
|
|
|
+# "Action": "sts:AssumeRole"
|
|
|
+# }
|
|
|
+# ]
|
|
|
+#}
|
|
|
+#EOF
|
|
|
+#}
|
|
|
+#
|
|
|
+#resource "aws_iam_instance_profile" "emr_profile" {
|
|
|
+# name = "emr_profile"
|
|
|
+# role = aws_iam_role.iam_emr_profile_role.name
|
|
|
+#}
|
|
|
+#
|
|
|
+#data "aws_iam_policy_document" "iam_emr_profile_policy" {
|
|
|
+# statement {
|
|
|
+# sid = ""
|
|
|
+# effect = "Allow"
|
|
|
+# resources = ["*"]
|
|
|
+#
|
|
|
+# actions = [
|
|
|
+# "cloudwatch:*",
|
|
|
+# "dynamodb:*",
|
|
|
+# "ec2:Describe*",
|
|
|
+# "elasticmapreduce:Describe*",
|
|
|
+# "elasticmapreduce:ListBootstrapActions",
|
|
|
+# "elasticmapreduce:ListClusters",
|
|
|
+# "elasticmapreduce:ListInstanceGroups",
|
|
|
+# "elasticmapreduce:ListInstances",
|
|
|
+# "elasticmapreduce:ListSteps",
|
|
|
+# "kinesis:CreateStream",
|
|
|
+# "kinesis:DeleteStream",
|
|
|
+# "kinesis:DescribeStream",
|
|
|
+# "kinesis:GetRecords",
|
|
|
+# "kinesis:GetShardIterator",
|
|
|
+# "kinesis:MergeShards",
|
|
|
+# "kinesis:PutRecord",
|
|
|
+# "kinesis:SplitShard",
|
|
|
+# "rds:Describe*",
|
|
|
+# "s3:*",
|
|
|
+# "sdb:*",
|
|
|
+# "sns:*",
|
|
|
+# "sqs:*",
|
|
|
+# ]
|
|
|
+# }
|
|
|
+#}
|
|
|
+#
|
|
|
+#resource "aws_iam_role_policy" "iam_emr_profile_policy" {
|
|
|
+# name = "iam_emr_profile_policy"
|
|
|
+# role = aws_iam_role.iam_emr_profile_role.id
|
|
|
+#
|
|
|
+# policy = data.aws_iam_policy_document.iam_emr_profile_policy.json
|
|
|
+#}
|