فهرست منبع

Initial Commit

Fred Damstra (Macbook 2015) 2 سال پیش
کامیت
2d4f332b7a
12فایلهای تغییر یافته به همراه471 افزوده شده و 0 حذف شده
  1. 6 0
      .gitignore
  2. 27 0
      .pre-commit-config.yaml
  3. 22 0
      .terraform.lock.hcl
  4. 14 0
      .tflint.hcl
  5. 3 0
      .tfsec.yaml
  6. 9 0
      backend.tf
  7. 15 0
      config.tf
  8. 347 0
      emr.tf
  9. 0 0
      notes.md
  10. 6 0
      output.tf
  11. 13 0
      provider.tf
  12. 9 0
      required_providers.tf

+ 6 - 0
.gitignore

@@ -0,0 +1,6 @@
+.terraform
+
+*.bak
+.*.swp
+
+tmp*

+ 27 - 0
.pre-commit-config.yaml

@@ -0,0 +1,27 @@
+repos:
+- repo: https://github.com/gruntwork-io/pre-commit
+  rev: v0.1.17 # Get the latest from: https://github.com/gruntwork-io/pre-commit/releases
+  hooks:
+    - id: tflint
+      args:
+        - "--init"
+        - "--config=.tflint.hcl"
+    - id: tflint
+      args:
+        #        - "--module"
+        - "--config=.tflint.hcl"
+    - id: terraform-validate
+    - id: terraform-fmt
+- repo: https://github.com/antonbabenko/pre-commit-terraform
+  rev: v1.76.0 # Get the latest from: https://github.com/antonbabenko/pre-commit-terraform/releases
+  hooks:
+    - id: terraform_tfsec
+      args:
+        - --args=--config-file .tfsec.yaml
+    - id: terraform_docs
+# checkov is good, but too thorough for our needs
+#    - id: terraform_checkov
+#      args:
+#        - --args=--quiet
+#        - --args=--skip-check CKV_AWS_144 # we don't cross-region replicate our s3
+#- "--skip-check", "CKV_AWS_150", # We do not enable deletion protection for LBs

+ 22 - 0
.terraform.lock.hcl

@@ -0,0 +1,22 @@
+# This file is maintained automatically by "terraform init".
+# Manual edits may be lost in future updates.
+
+provider "registry.terraform.io/hashicorp/aws" {
+  version     = "4.35.0"
+  constraints = "~> 4.0"
+  hashes = [
+    "h1:YWGliEq8S7vVrR+I/lwr9GcyVctB1n9/Qz7eElKrXbg=",
+    "zh:045c9c113311a358e6f311a6d7c67f4a18a53d6468a5dbe4ad4d1c5a3cf089cf",
+    "zh:43ee43aca5a5377e3b55463c19ab497e24a3653c233151214d1907ff3d7ae749",
+    "zh:5834362e4a402bb2682de4166c340fdc88c910d393c1753c613a526685279083",
+    "zh:64a0066e1893077d70aaa13f2ab7a9e3a5bc676767daa4036088e28c799a5b88",
+    "zh:690cbc4cfad5f74899bd0695896ecd1e9cb3dd362dfcae13701eb5e955409372",
+    "zh:82ebbd737671bf8f4ed85183c4a37115ae7fc6aa9a6213e30509a4f806e593a0",
+    "zh:8b9a92114b09eadd594f8f39edadaa103e640d57a10df3b7283a875d76faf2e4",
+    "zh:9b12af85486a96aedd8d7984b0ff811a4b42e3d88dad1a3fb4c0b580d04fa425",
+    "zh:afd2845438edb58d884f4812c3323d07fc9cbcbee811459320a492d54dd4c40f",
+    "zh:d2c66ed9668465342a02a7aef6a4816464c265d8f0207c240eef87a18341c8d9",
+    "zh:e1515394042dca2b583be7b32d41c77e2815520bb9eabdab4d1d21e25fae6f37",
+    "zh:e290fb45993b0ce0dfaf0d88c737e088918cd9b0607827d773c529f04d12502d",
+  ]
+}

+ 14 - 0
.tflint.hcl

@@ -0,0 +1,14 @@
+# This should be enabled automatically, but enabling it manually breaks it.
+#plugin "aws" {
+#  enabled = true
+#  deep_check = false # deep checking makes api calls to verify select things
+#}
+
+# Custom rules go here
+# This also breaks
+#rule "aws_resource_missing_tags" {
+#  enabled = true
+#  tags = [
+#    "tf_module"
+#  ]
+#}

+ 3 - 0
.tfsec.yaml

@@ -0,0 +1,3 @@
+---
+exclude:
+  - aws-dynamodb-table-customer-key # We don't care about default keys, encryption is fine

+ 9 - 0
backend.tf

@@ -0,0 +1,9 @@
+terraform {
+  backend "s3" {
+    bucket  = "terraform-remote-state-20221017144428493300000001"
+    key     = local.unique_id
+    region  = "us-east-2"
+    encrypt = true
+    profile = "default"
+  }
+}

+ 15 - 0
config.tf

@@ -0,0 +1,15 @@
+locals {
+  # unique id is used for terraform backend state storage. Duplicates _will_ be a problem.
+  unique_id = "monkeybox_emr_lab_jupyter"
+
+  # Everything here should be self-explanatory
+  profile = "default"
+  region  = "us-east-2"
+  #  tags = {
+  #    "tf_module" : basename(path.root)
+  #  }
+}
+
+# Uncomment if needed
+#data "aws_caller_identity" "current" {}
+#data "aws_partition" "current" {}

+ 347 - 0
emr.tf

@@ -0,0 +1,347 @@
+#resource "aws_emr_cluster" "cluster" {
+#  name          = "monkeybox-emr-lab"
+#  release_label = "emr-5.36.0"
+#  applications  = ["Spark"]
+#  log_uri       = "s3://monkeybox-emr-test/logs"
+#
+#  ec2_attributes {
+#    subnet_id                         = aws_subnet.main.id
+#    emr_managed_master_security_group = aws_security_group.allow_access.id
+#    emr_managed_slave_security_group  = aws_security_group.allow_access.id
+#    instance_profile                  = aws_iam_instance_profile.emr_profile.arn
+#  }
+#
+#  master_instance_group {
+#    name          = "monkeybox-emr-lab-master"
+#    bid_price     = "0.07" # Don't set this over the hourly cost!
+#    instance_type = "m5.xlarge"
+#  }
+#
+#  core_instance_group {
+#    name           = "monkeybox-emr-lab-core"
+#    bid_price      = "0.07" # Don't set this over the hourly cost!
+#    instance_count = 2
+#    instance_type  = "m5.xlarge"
+#  }
+#
+#  tags = {
+#    project = "monkeybox_emr_lab"
+#  }
+#
+#  bootstrap_action {
+#    path = "s3://us-east-2.elasticmapreduce/bootstrap-actions/run-if"
+#    name = "runif"
+#    args = ["instance.isMaster=true", "echo running on master node"]
+#  }
+#
+#  auto_termination_policy {
+#    idle_timeout = 60
+#  }
+#
+#  step {
+#    name              = "Install modules"
+#    action_on_failure = "CONTINUE"
+#
+#    hadoop_jar_step {
+#      jar  = "command-runner.jar"
+#      args = ["sudo", "python3", "-m", "pip", "install", "numpy", "matplotlib", "pandas", "seaborn", "pyspark"]
+#    }
+#  }
+#
+#  step {
+#    name              = "Copy script file from s3."
+#    action_on_failure = "CONTINUE"
+#
+#    hadoop_jar_step {
+#      jar  = "command-runner.jar"
+#      args = ["aws", "s3", "cp", "s3://monkeybox-emr-test/health_violations.py", "/home/hadoop/"]
+#    }
+#  }
+#
+#  step {
+#    name              = "Pyspark Job."
+#    action_on_failure = "CONTINUE"
+#
+#    hadoop_jar_step {
+#      jar  = "command-runner.jar"
+#      args = ["sudo", "python3", "/home/hadoop/health_violations.py", "--data_source", "s3://monkeybox-emr-test/food_establishment_data.csv", "--output_uri", "s3://monkeybox-emr-test/output/"]
+#    }
+#  }
+#
+#  #  configurations_json = <<EOF
+#  #[
+#  #  {
+#  #    "Classification": "hadoop-env",
+#  #    "Configurations": [
+#  #      {
+#  #        "Classification": "export",
+#  #        "Properties": {
+#  #          "JAVA_HOME": "/usr/lib/jvm/java-1.8.0"
+#  #        }
+#  #      }
+#  #    ],
+#  #    "Properties": {}
+#  #  },
+#  #  {
+#  #    "Classification": "spark-env",
+#  #    "Configurations": [
+#  #      {
+#  #        "Classification": "export",
+#  #        "Properties": {
+#  #          "JAVA_HOME": "/usr/lib/jvm/java-1.8.0"
+#  #        }
+#  #      }
+#  #    ],
+#  #    "Properties": {}
+#  #  }
+#  #]
+#  #EOF
+#
+#  service_role = aws_iam_role.iam_emr_service_role.arn
+#}
+#
+#resource "aws_security_group" "allow_access" {
+#  name        = "allow_access"
+#  description = "Allow inbound traffic"
+#  vpc_id      = aws_vpc.main.id
+#
+#  ingress {
+#    from_port   = 0
+#    to_port     = 0
+#    protocol    = "-1"
+#    cidr_blocks = [aws_vpc.main.cidr_block]
+#  }
+#
+#  egress {
+#    from_port   = 0
+#    to_port     = 0
+#    protocol    = "-1"
+#    cidr_blocks = ["0.0.0.0/0"]
+#  }
+#
+#  depends_on = [aws_subnet.main]
+#
+#  lifecycle {
+#    ignore_changes = [
+#      ingress,
+#      egress,
+#    ]
+#  }
+#
+#  tags = {
+#    project = "monkeybox_emr_lab"
+#  }
+#}
+#
+#resource "aws_vpc" "main" {
+#  cidr_block           = "172.16.0.0/16"
+#  enable_dns_hostnames = true
+#
+#  tags = {
+#    name    = "monkeybox_emr_lab"
+#    project = "monkeybox_emr_lab"
+#  }
+#}
+#
+#resource "aws_vpc_endpoint_route_table_association" "example" {
+#  route_table_id  = aws_route_table.r.id
+#  vpc_endpoint_id = aws_vpc_endpoint.s3.id
+#}
+#
+#resource "aws_subnet" "main" {
+#  vpc_id                  = aws_vpc.main.id
+#  cidr_block              = "172.16.0.0/20"
+#  map_public_ip_on_launch = true
+#
+#  tags = {
+#    name    = "monkeybox_emr_lab"
+#    project = "monkeybox_emr_lab"
+#  }
+#}
+#
+#resource "aws_internet_gateway" "gw" {
+#  vpc_id = aws_vpc.main.id
+#}
+#
+#resource "aws_vpc_endpoint" "s3" {
+#  vpc_id       = aws_vpc.main.id
+#  service_name = "com.amazonaws.us-east-2.s3"
+#
+#  tags = {
+#    project = "monkeybox_emr_lab"
+#  }
+#}
+#
+#resource "aws_route_table" "r" {
+#  vpc_id = aws_vpc.main.id
+#
+#  route {
+#    cidr_block = "0.0.0.0/0"
+#    gateway_id = aws_internet_gateway.gw.id
+#  }
+#}
+#
+#resource "aws_main_route_table_association" "a" {
+#  vpc_id         = aws_vpc.main.id
+#  route_table_id = aws_route_table.r.id
+#}
+#
+####
+## IAM Role setups
+####
+#
+## IAM role for EMR Service
+#resource "aws_iam_role" "iam_emr_service_role" {
+#  name = "iam_emr_service_role"
+#
+#  assume_role_policy = <<EOF
+#{
+#  "Version": "2008-10-17",
+#  "Statement": [
+#    {
+#      "Sid": "",
+#      "Effect": "Allow",
+#      "Principal": {
+#        "Service": "elasticmapreduce.amazonaws.com"
+#      },
+#      "Action": "sts:AssumeRole"
+#    }
+#  ]
+#}
+#EOF
+#}
+#
+#data "aws_iam_policy_document" "iam_emr_service_policy" {
+#  statement {
+#    sid       = ""
+#    effect    = "Allow"
+#    resources = ["*"]
+#
+#    actions = [
+#      "ec2:AuthorizeSecurityGroupEgress",
+#      "ec2:AuthorizeSecurityGroupIngress",
+#      "ec2:CancelSpotInstanceRequests",
+#      "ec2:CreateNetworkInterface",
+#      "ec2:CreateSecurityGroup",
+#      "ec2:CreateTags",
+#      "ec2:DeleteNetworkInterface",
+#      "ec2:DeleteSecurityGroup",
+#      "ec2:DeleteTags",
+#      "ec2:DescribeAvailabilityZones",
+#      "ec2:DescribeAccountAttributes",
+#      "ec2:DescribeDhcpOptions",
+#      "ec2:DescribeInstanceStatus",
+#      "ec2:DescribeInstances",
+#      "ec2:DescribeKeyPairs",
+#      "ec2:DescribeNetworkAcls",
+#      "ec2:DescribeNetworkInterfaces",
+#      "ec2:DescribePrefixLists",
+#      "ec2:DescribeRouteTables",
+#      "ec2:DescribeSecurityGroups",
+#      "ec2:DescribeSpotInstanceRequests",
+#      "ec2:DescribeSpotPriceHistory",
+#      "ec2:DescribeSubnets",
+#      "ec2:DescribeVpcAttribute",
+#      "ec2:DescribeVpcEndpoints",
+#      "ec2:DescribeVpcEndpointServices",
+#      "ec2:DescribeVpcs",
+#      "ec2:DetachNetworkInterface",
+#      "ec2:ModifyImageAttribute",
+#      "ec2:ModifyInstanceAttribute",
+#      "ec2:RequestSpotInstances",
+#      "ec2:RevokeSecurityGroupEgress",
+#      "ec2:RunInstances",
+#      "ec2:TerminateInstances",
+#      "ec2:DeleteVolume",
+#      "ec2:DescribeVolumeStatus",
+#      "ec2:DescribeVolumes",
+#      "ec2:DetachVolume",
+#      "iam:GetRole",
+#      "iam:GetRolePolicy",
+#      "iam:ListInstanceProfiles",
+#      "iam:ListRolePolicies",
+#      "iam:PassRole",
+#      "s3:*",
+#      "sdb:BatchPutAttributes",
+#      "sdb:Select",
+#      "sqs:CreateQueue",
+#      "sqs:Delete*",
+#      "sqs:GetQueue*",
+#      "sqs:PurgeQueue",
+#      "sqs:ReceiveMessage",
+#    ]
+#  }
+#}
+#
+#resource "aws_iam_role_policy" "iam_emr_service_policy" {
+#  name = "iam_emr_service_policy"
+#  role = aws_iam_role.iam_emr_service_role.id
+#
+#  policy = data.aws_iam_policy_document.iam_emr_service_policy.json
+#}
+#
+## IAM Role for EC2 Instance Profile
+#resource "aws_iam_role" "iam_emr_profile_role" {
+#  name = "iam_emr_profile_role"
+#
+#  assume_role_policy = <<EOF
+#{
+#  "Version": "2008-10-17",
+#  "Statement": [
+#    {
+#      "Sid": "",
+#      "Effect": "Allow",
+#      "Principal": {
+#        "Service": "ec2.amazonaws.com"
+#      },
+#      "Action": "sts:AssumeRole"
+#    }
+#  ]
+#}
+#EOF
+#}
+#
+#resource "aws_iam_instance_profile" "emr_profile" {
+#  name = "emr_profile"
+#  role = aws_iam_role.iam_emr_profile_role.name
+#}
+#
+#data "aws_iam_policy_document" "iam_emr_profile_policy" {
+#  statement {
+#    sid       = ""
+#    effect    = "Allow"
+#    resources = ["*"]
+#
+#    actions = [
+#      "cloudwatch:*",
+#      "dynamodb:*",
+#      "ec2:Describe*",
+#      "elasticmapreduce:Describe*",
+#      "elasticmapreduce:ListBootstrapActions",
+#      "elasticmapreduce:ListClusters",
+#      "elasticmapreduce:ListInstanceGroups",
+#      "elasticmapreduce:ListInstances",
+#      "elasticmapreduce:ListSteps",
+#      "kinesis:CreateStream",
+#      "kinesis:DeleteStream",
+#      "kinesis:DescribeStream",
+#      "kinesis:GetRecords",
+#      "kinesis:GetShardIterator",
+#      "kinesis:MergeShards",
+#      "kinesis:PutRecord",
+#      "kinesis:SplitShard",
+#      "rds:Describe*",
+#      "s3:*",
+#      "sdb:*",
+#      "sns:*",
+#      "sqs:*",
+#    ]
+#  }
+#}
+#
+#resource "aws_iam_role_policy" "iam_emr_profile_policy" {
+#  name = "iam_emr_profile_policy"
+#  role = aws_iam_role.iam_emr_profile_role.id
+#
+#  policy = data.aws_iam_policy_document.iam_emr_profile_policy.json
+#}

+ 0 - 0
notes.md


+ 6 - 0
output.tf

@@ -0,0 +1,6 @@
+locals {
+}
+
+output "example" {
+  value = "Configure some outputs."
+}

+ 13 - 0
provider.tf

@@ -0,0 +1,13 @@
+# Configure the AWS Provider
+provider "aws" {
+  region  = local.region
+  profile = local.profile
+
+  # I'm hoping this might be useful for adding a 'last_applied_by' tag
+  #ignore_tags {
+  #  # specific tag
+  #  keys = ["ChangedAt"]
+  #  # or by prefix to ignore ChangedBy too
+  #  key_prefixes = ["Changed"]
+  #}
+}

+ 9 - 0
required_providers.tf

@@ -0,0 +1,9 @@
+terraform {
+  required_version = ">= 1.0"
+  required_providers {
+    aws = {
+      source  = "hashicorp/aws"
+      version = "~> 4.0"
+    }
+  }
+}