123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347 |
- #resource "aws_emr_cluster" "cluster" {
- # name = "monkeybox-emr-lab"
- # release_label = "emr-5.36.0"
- # applications = ["Spark"]
- # log_uri = "s3://monkeybox-emr-test/logs"
- #
- # ec2_attributes {
- # subnet_id = aws_subnet.main.id
- # emr_managed_master_security_group = aws_security_group.allow_access.id
- # emr_managed_slave_security_group = aws_security_group.allow_access.id
- # instance_profile = aws_iam_instance_profile.emr_profile.arn
- # }
- #
- # master_instance_group {
- # name = "monkeybox-emr-lab-master"
- # bid_price = "0.07" # Don't set this over the hourly cost!
- # instance_type = "m5.xlarge"
- # }
- #
- # core_instance_group {
- # name = "monkeybox-emr-lab-core"
- # bid_price = "0.07" # Don't set this over the hourly cost!
- # instance_count = 2
- # instance_type = "m5.xlarge"
- # }
- #
- # tags = {
- # project = "monkeybox_emr_lab"
- # }
- #
- # bootstrap_action {
- # path = "s3://us-east-2.elasticmapreduce/bootstrap-actions/run-if"
- # name = "runif"
- # args = ["instance.isMaster=true", "echo running on master node"]
- # }
- #
- # auto_termination_policy {
- # idle_timeout = 60
- # }
- #
- # step {
- # name = "Install modules"
- # action_on_failure = "CONTINUE"
- #
- # hadoop_jar_step {
- # jar = "command-runner.jar"
- # args = ["sudo", "python3", "-m", "pip", "install", "numpy", "matplotlib", "pandas", "seaborn", "pyspark"]
- # }
- # }
- #
- # step {
- # name = "Copy script file from s3."
- # action_on_failure = "CONTINUE"
- #
- # hadoop_jar_step {
- # jar = "command-runner.jar"
- # args = ["aws", "s3", "cp", "s3://monkeybox-emr-test/health_violations.py", "/home/hadoop/"]
- # }
- # }
- #
- # step {
- # name = "Pyspark Job."
- # action_on_failure = "CONTINUE"
- #
- # hadoop_jar_step {
- # jar = "command-runner.jar"
- # args = ["sudo", "python3", "/home/hadoop/health_violations.py", "--data_source", "s3://monkeybox-emr-test/food_establishment_data.csv", "--output_uri", "s3://monkeybox-emr-test/output/"]
- # }
- # }
- #
- # # configurations_json = <<EOF
- # #[
- # # {
- # # "Classification": "hadoop-env",
- # # "Configurations": [
- # # {
- # # "Classification": "export",
- # # "Properties": {
- # # "JAVA_HOME": "/usr/lib/jvm/java-1.8.0"
- # # }
- # # }
- # # ],
- # # "Properties": {}
- # # },
- # # {
- # # "Classification": "spark-env",
- # # "Configurations": [
- # # {
- # # "Classification": "export",
- # # "Properties": {
- # # "JAVA_HOME": "/usr/lib/jvm/java-1.8.0"
- # # }
- # # }
- # # ],
- # # "Properties": {}
- # # }
- # #]
- # #EOF
- #
- # service_role = aws_iam_role.iam_emr_service_role.arn
- #}
- #
- #resource "aws_security_group" "allow_access" {
- # name = "allow_access"
- # description = "Allow inbound traffic"
- # vpc_id = aws_vpc.main.id
- #
- # ingress {
- # from_port = 0
- # to_port = 0
- # protocol = "-1"
- # cidr_blocks = [aws_vpc.main.cidr_block]
- # }
- #
- # egress {
- # from_port = 0
- # to_port = 0
- # protocol = "-1"
- # cidr_blocks = ["0.0.0.0/0"]
- # }
- #
- # depends_on = [aws_subnet.main]
- #
- # lifecycle {
- # ignore_changes = [
- # ingress,
- # egress,
- # ]
- # }
- #
- # tags = {
- # project = "monkeybox_emr_lab"
- # }
- #}
- #
- #resource "aws_vpc" "main" {
- # cidr_block = "172.16.0.0/16"
- # enable_dns_hostnames = true
- #
- # tags = {
- # name = "monkeybox_emr_lab"
- # project = "monkeybox_emr_lab"
- # }
- #}
- #
- #resource "aws_vpc_endpoint_route_table_association" "example" {
- # route_table_id = aws_route_table.r.id
- # vpc_endpoint_id = aws_vpc_endpoint.s3.id
- #}
- #
- #resource "aws_subnet" "main" {
- # vpc_id = aws_vpc.main.id
- # cidr_block = "172.16.0.0/20"
- # map_public_ip_on_launch = true
- #
- # tags = {
- # name = "monkeybox_emr_lab"
- # project = "monkeybox_emr_lab"
- # }
- #}
- #
- #resource "aws_internet_gateway" "gw" {
- # vpc_id = aws_vpc.main.id
- #}
- #
- #resource "aws_vpc_endpoint" "s3" {
- # vpc_id = aws_vpc.main.id
- # service_name = "com.amazonaws.us-east-2.s3"
- #
- # tags = {
- # project = "monkeybox_emr_lab"
- # }
- #}
- #
- #resource "aws_route_table" "r" {
- # vpc_id = aws_vpc.main.id
- #
- # route {
- # cidr_block = "0.0.0.0/0"
- # gateway_id = aws_internet_gateway.gw.id
- # }
- #}
- #
- #resource "aws_main_route_table_association" "a" {
- # vpc_id = aws_vpc.main.id
- # route_table_id = aws_route_table.r.id
- #}
- #
- ####
- ## IAM Role setups
- ####
- #
- ## IAM role for EMR Service
- #resource "aws_iam_role" "iam_emr_service_role" {
- # name = "iam_emr_service_role"
- #
- # assume_role_policy = <<EOF
- #{
- # "Version": "2008-10-17",
- # "Statement": [
- # {
- # "Sid": "",
- # "Effect": "Allow",
- # "Principal": {
- # "Service": "elasticmapreduce.amazonaws.com"
- # },
- # "Action": "sts:AssumeRole"
- # }
- # ]
- #}
- #EOF
- #}
- #
- #data "aws_iam_policy_document" "iam_emr_service_policy" {
- # statement {
- # sid = ""
- # effect = "Allow"
- # resources = ["*"]
- #
- # actions = [
- # "ec2:AuthorizeSecurityGroupEgress",
- # "ec2:AuthorizeSecurityGroupIngress",
- # "ec2:CancelSpotInstanceRequests",
- # "ec2:CreateNetworkInterface",
- # "ec2:CreateSecurityGroup",
- # "ec2:CreateTags",
- # "ec2:DeleteNetworkInterface",
- # "ec2:DeleteSecurityGroup",
- # "ec2:DeleteTags",
- # "ec2:DescribeAvailabilityZones",
- # "ec2:DescribeAccountAttributes",
- # "ec2:DescribeDhcpOptions",
- # "ec2:DescribeInstanceStatus",
- # "ec2:DescribeInstances",
- # "ec2:DescribeKeyPairs",
- # "ec2:DescribeNetworkAcls",
- # "ec2:DescribeNetworkInterfaces",
- # "ec2:DescribePrefixLists",
- # "ec2:DescribeRouteTables",
- # "ec2:DescribeSecurityGroups",
- # "ec2:DescribeSpotInstanceRequests",
- # "ec2:DescribeSpotPriceHistory",
- # "ec2:DescribeSubnets",
- # "ec2:DescribeVpcAttribute",
- # "ec2:DescribeVpcEndpoints",
- # "ec2:DescribeVpcEndpointServices",
- # "ec2:DescribeVpcs",
- # "ec2:DetachNetworkInterface",
- # "ec2:ModifyImageAttribute",
- # "ec2:ModifyInstanceAttribute",
- # "ec2:RequestSpotInstances",
- # "ec2:RevokeSecurityGroupEgress",
- # "ec2:RunInstances",
- # "ec2:TerminateInstances",
- # "ec2:DeleteVolume",
- # "ec2:DescribeVolumeStatus",
- # "ec2:DescribeVolumes",
- # "ec2:DetachVolume",
- # "iam:GetRole",
- # "iam:GetRolePolicy",
- # "iam:ListInstanceProfiles",
- # "iam:ListRolePolicies",
- # "iam:PassRole",
- # "s3:*",
- # "sdb:BatchPutAttributes",
- # "sdb:Select",
- # "sqs:CreateQueue",
- # "sqs:Delete*",
- # "sqs:GetQueue*",
- # "sqs:PurgeQueue",
- # "sqs:ReceiveMessage",
- # ]
- # }
- #}
- #
- #resource "aws_iam_role_policy" "iam_emr_service_policy" {
- # name = "iam_emr_service_policy"
- # role = aws_iam_role.iam_emr_service_role.id
- #
- # policy = data.aws_iam_policy_document.iam_emr_service_policy.json
- #}
- #
- ## IAM Role for EC2 Instance Profile
- #resource "aws_iam_role" "iam_emr_profile_role" {
- # name = "iam_emr_profile_role"
- #
- # assume_role_policy = <<EOF
- #{
- # "Version": "2008-10-17",
- # "Statement": [
- # {
- # "Sid": "",
- # "Effect": "Allow",
- # "Principal": {
- # "Service": "ec2.amazonaws.com"
- # },
- # "Action": "sts:AssumeRole"
- # }
- # ]
- #}
- #EOF
- #}
- #
- #resource "aws_iam_instance_profile" "emr_profile" {
- # name = "emr_profile"
- # role = aws_iam_role.iam_emr_profile_role.name
- #}
- #
- #data "aws_iam_policy_document" "iam_emr_profile_policy" {
- # statement {
- # sid = ""
- # effect = "Allow"
- # resources = ["*"]
- #
- # actions = [
- # "cloudwatch:*",
- # "dynamodb:*",
- # "ec2:Describe*",
- # "elasticmapreduce:Describe*",
- # "elasticmapreduce:ListBootstrapActions",
- # "elasticmapreduce:ListClusters",
- # "elasticmapreduce:ListInstanceGroups",
- # "elasticmapreduce:ListInstances",
- # "elasticmapreduce:ListSteps",
- # "kinesis:CreateStream",
- # "kinesis:DeleteStream",
- # "kinesis:DescribeStream",
- # "kinesis:GetRecords",
- # "kinesis:GetShardIterator",
- # "kinesis:MergeShards",
- # "kinesis:PutRecord",
- # "kinesis:SplitShard",
- # "rds:Describe*",
- # "s3:*",
- # "sdb:*",
- # "sns:*",
- # "sqs:*",
- # ]
- # }
- #}
- #
- #resource "aws_iam_role_policy" "iam_emr_profile_policy" {
- # name = "iam_emr_profile_policy"
- # role = aws_iam_role.iam_emr_profile_role.id
- #
- # policy = data.aws_iam_policy_document.iam_emr_profile_policy.json
- #}
|