emr.tf 8.3 KB


  1. #resource "aws_emr_cluster" "cluster" {
  2. # name = "monkeybox-emr-lab"
  3. # release_label = "emr-5.36.0"
  4. # applications = ["Spark"]
  5. # log_uri = "s3://monkeybox-emr-test/logs"
  6. #
  7. # ec2_attributes {
  8. # subnet_id = aws_subnet.main.id
  9. # emr_managed_master_security_group = aws_security_group.allow_access.id
  10. # emr_managed_slave_security_group = aws_security_group.allow_access.id
  11. # instance_profile = aws_iam_instance_profile.emr_profile.arn
  12. # }
  13. #
  14. # master_instance_group {
  15. # name = "monkeybox-emr-lab-master"
  16. # bid_price = "0.07" # Don't set this over the hourly cost!
  17. # instance_type = "m5.xlarge"
  18. # }
  19. #
  20. # core_instance_group {
  21. # name = "monkeybox-emr-lab-core"
  22. # bid_price = "0.07" # Don't set this over the hourly cost!
  23. # instance_count = 2
  24. # instance_type = "m5.xlarge"
  25. # }
  26. #
  27. # tags = {
  28. # project = "monkeybox_emr_lab"
  29. # }
  30. #
  31. # bootstrap_action {
  32. # path = "s3://us-east-2.elasticmapreduce/bootstrap-actions/run-if"
  33. # name = "runif"
  34. # args = ["instance.isMaster=true", "echo running on master node"]
  35. # }
  36. #
  37. # auto_termination_policy {
  38. # idle_timeout = 60
  39. # }
  40. #
  41. # step {
  42. # name = "Install modules"
  43. # action_on_failure = "CONTINUE"
  44. #
  45. # hadoop_jar_step {
  46. # jar = "command-runner.jar"
  47. # args = ["sudo", "python3", "-m", "pip", "install", "numpy", "matplotlib", "pandas", "seaborn", "pyspark"]
  48. # }
  49. # }
  50. #
  51. # step {
  52. # name = "Copy script file from s3."
  53. # action_on_failure = "CONTINUE"
  54. #
  55. # hadoop_jar_step {
  56. # jar = "command-runner.jar"
  57. # args = ["aws", "s3", "cp", "s3://monkeybox-emr-test/health_violations.py", "/home/hadoop/"]
  58. # }
  59. # }
  60. #
  61. # step {
  62. # name = "Pyspark Job."
  63. # action_on_failure = "CONTINUE"
  64. #
  65. # hadoop_jar_step {
  66. # jar = "command-runner.jar"
  67. # args = ["sudo", "python3", "/home/hadoop/health_violations.py", "--data_source", "s3://monkeybox-emr-test/food_establishment_data.csv", "--output_uri", "s3://monkeybox-emr-test/output/"]
  68. # }
  69. # }
  70. #
  71. # # configurations_json = <<EOF
  72. # #[
  73. # # {
  74. # # "Classification": "hadoop-env",
  75. # # "Configurations": [
  76. # # {
  77. # # "Classification": "export",
  78. # # "Properties": {
  79. # # "JAVA_HOME": "/usr/lib/jvm/java-1.8.0"
  80. # # }
  81. # # }
  82. # # ],
  83. # # "Properties": {}
  84. # # },
  85. # # {
  86. # # "Classification": "spark-env",
  87. # # "Configurations": [
  88. # # {
  89. # # "Classification": "export",
  90. # # "Properties": {
  91. # # "JAVA_HOME": "/usr/lib/jvm/java-1.8.0"
  92. # # }
  93. # # }
  94. # # ],
  95. # # "Properties": {}
  96. # # }
  97. # #]
  98. # #EOF
  99. #
  100. # service_role = aws_iam_role.iam_emr_service_role.arn
  101. #}
  102. #
  103. #resource "aws_security_group" "allow_access" {
  104. # name = "allow_access"
  105. # description = "Allow inbound traffic"
  106. # vpc_id = aws_vpc.main.id
  107. #
  108. # ingress {
  109. # from_port = 0
  110. # to_port = 0
  111. # protocol = "-1"
  112. # cidr_blocks = [aws_vpc.main.cidr_block]
  113. # }
  114. #
  115. # egress {
  116. # from_port = 0
  117. # to_port = 0
  118. # protocol = "-1"
  119. # cidr_blocks = ["0.0.0.0/0"]
  120. # }
  121. #
  122. # depends_on = [aws_subnet.main]
  123. #
  124. # lifecycle {
  125. # ignore_changes = [
  126. # ingress,
  127. # egress,
  128. # ]
  129. # }
  130. #
  131. # tags = {
  132. # project = "monkeybox_emr_lab"
  133. # }
  134. #}
  135. #
  136. #resource "aws_vpc" "main" {
  137. # cidr_block = "172.16.0.0/16"
  138. # enable_dns_hostnames = true
  139. #
  140. # tags = {
  141. # name = "monkeybox_emr_lab"
  142. # project = "monkeybox_emr_lab"
  143. # }
  144. #}
  145. #
  146. #resource "aws_vpc_endpoint_route_table_association" "example" {
  147. # route_table_id = aws_route_table.r.id
  148. # vpc_endpoint_id = aws_vpc_endpoint.s3.id
  149. #}
  150. #
  151. #resource "aws_subnet" "main" {
  152. # vpc_id = aws_vpc.main.id
  153. # cidr_block = "172.16.0.0/20"
  154. # map_public_ip_on_launch = true
  155. #
  156. # tags = {
  157. # name = "monkeybox_emr_lab"
  158. # project = "monkeybox_emr_lab"
  159. # }
  160. #}
  161. #
  162. #resource "aws_internet_gateway" "gw" {
  163. # vpc_id = aws_vpc.main.id
  164. #}
  165. #
  166. #resource "aws_vpc_endpoint" "s3" {
  167. # vpc_id = aws_vpc.main.id
  168. # service_name = "com.amazonaws.us-east-2.s3"
  169. #
  170. # tags = {
  171. # project = "monkeybox_emr_lab"
  172. # }
  173. #}
  174. #
  175. #resource "aws_route_table" "r" {
  176. # vpc_id = aws_vpc.main.id
  177. #
  178. # route {
  179. # cidr_block = "0.0.0.0/0"
  180. # gateway_id = aws_internet_gateway.gw.id
  181. # }
  182. #}
  183. #
  184. #resource "aws_main_route_table_association" "a" {
  185. # vpc_id = aws_vpc.main.id
  186. # route_table_id = aws_route_table.r.id
  187. #}
  188. #
  189. ####
  190. ## IAM Role setups
  191. ####
  192. #
  193. ## IAM role for EMR Service
  194. #resource "aws_iam_role" "iam_emr_service_role" {
  195. # name = "iam_emr_service_role"
  196. #
  197. # assume_role_policy = <<EOF
  198. #{
  199. # "Version": "2008-10-17",
  200. # "Statement": [
  201. # {
  202. # "Sid": "",
  203. # "Effect": "Allow",
  204. # "Principal": {
  205. # "Service": "elasticmapreduce.amazonaws.com"
  206. # },
  207. # "Action": "sts:AssumeRole"
  208. # }
  209. # ]
  210. #}
  211. #EOF
  212. #}
  213. #
  214. #data "aws_iam_policy_document" "iam_emr_service_policy" {
  215. # statement {
  216. # sid = ""
  217. # effect = "Allow"
  218. # resources = ["*"]
  219. #
  220. # actions = [
  221. # "ec2:AuthorizeSecurityGroupEgress",
  222. # "ec2:AuthorizeSecurityGroupIngress",
  223. # "ec2:CancelSpotInstanceRequests",
  224. # "ec2:CreateNetworkInterface",
  225. # "ec2:CreateSecurityGroup",
  226. # "ec2:CreateTags",
  227. # "ec2:DeleteNetworkInterface",
  228. # "ec2:DeleteSecurityGroup",
  229. # "ec2:DeleteTags",
  230. # "ec2:DescribeAvailabilityZones",
  231. # "ec2:DescribeAccountAttributes",
  232. # "ec2:DescribeDhcpOptions",
  233. # "ec2:DescribeInstanceStatus",
  234. # "ec2:DescribeInstances",
  235. # "ec2:DescribeKeyPairs",
  236. # "ec2:DescribeNetworkAcls",
  237. # "ec2:DescribeNetworkInterfaces",
  238. # "ec2:DescribePrefixLists",
  239. # "ec2:DescribeRouteTables",
  240. # "ec2:DescribeSecurityGroups",
  241. # "ec2:DescribeSpotInstanceRequests",
  242. # "ec2:DescribeSpotPriceHistory",
  243. # "ec2:DescribeSubnets",
  244. # "ec2:DescribeVpcAttribute",
  245. # "ec2:DescribeVpcEndpoints",
  246. # "ec2:DescribeVpcEndpointServices",
  247. # "ec2:DescribeVpcs",
  248. # "ec2:DetachNetworkInterface",
  249. # "ec2:ModifyImageAttribute",
  250. # "ec2:ModifyInstanceAttribute",
  251. # "ec2:RequestSpotInstances",
  252. # "ec2:RevokeSecurityGroupEgress",
  253. # "ec2:RunInstances",
  254. # "ec2:TerminateInstances",
  255. # "ec2:DeleteVolume",
  256. # "ec2:DescribeVolumeStatus",
  257. # "ec2:DescribeVolumes",
  258. # "ec2:DetachVolume",
  259. # "iam:GetRole",
  260. # "iam:GetRolePolicy",
  261. # "iam:ListInstanceProfiles",
  262. # "iam:ListRolePolicies",
  263. # "iam:PassRole",
  264. # "s3:*",
  265. # "sdb:BatchPutAttributes",
  266. # "sdb:Select",
  267. # "sqs:CreateQueue",
  268. # "sqs:Delete*",
  269. # "sqs:GetQueue*",
  270. # "sqs:PurgeQueue",
  271. # "sqs:ReceiveMessage",
  272. # ]
  273. # }
  274. #}
  275. #
  276. #resource "aws_iam_role_policy" "iam_emr_service_policy" {
  277. # name = "iam_emr_service_policy"
  278. # role = aws_iam_role.iam_emr_service_role.id
  279. #
  280. # policy = data.aws_iam_policy_document.iam_emr_service_policy.json
  281. #}
  282. #
  283. ## IAM Role for EC2 Instance Profile
  284. #resource "aws_iam_role" "iam_emr_profile_role" {
  285. # name = "iam_emr_profile_role"
  286. #
  287. # assume_role_policy = <<EOF
  288. #{
  289. # "Version": "2008-10-17",
  290. # "Statement": [
  291. # {
  292. # "Sid": "",
  293. # "Effect": "Allow",
  294. # "Principal": {
  295. # "Service": "ec2.amazonaws.com"
  296. # },
  297. # "Action": "sts:AssumeRole"
  298. # }
  299. # ]
  300. #}
  301. #EOF
  302. #}
  303. #
  304. #resource "aws_iam_instance_profile" "emr_profile" {
  305. # name = "emr_profile"
  306. # role = aws_iam_role.iam_emr_profile_role.name
  307. #}
  308. #
  309. #data "aws_iam_policy_document" "iam_emr_profile_policy" {
  310. # statement {
  311. # sid = ""
  312. # effect = "Allow"
  313. # resources = ["*"]
  314. #
  315. # actions = [
  316. # "cloudwatch:*",
  317. # "dynamodb:*",
  318. # "ec2:Describe*",
  319. # "elasticmapreduce:Describe*",
  320. # "elasticmapreduce:ListBootstrapActions",
  321. # "elasticmapreduce:ListClusters",
  322. # "elasticmapreduce:ListInstanceGroups",
  323. # "elasticmapreduce:ListInstances",
  324. # "elasticmapreduce:ListSteps",
  325. # "kinesis:CreateStream",
  326. # "kinesis:DeleteStream",
  327. # "kinesis:DescribeStream",
  328. # "kinesis:GetRecords",
  329. # "kinesis:GetShardIterator",
  330. # "kinesis:MergeShards",
  331. # "kinesis:PutRecord",
  332. # "kinesis:SplitShard",
  333. # "rds:Describe*",
  334. # "s3:*",
  335. # "sdb:*",
  336. # "sns:*",
  337. # "sqs:*",
  338. # ]
  339. # }
  340. #}
  341. #
  342. #resource "aws_iam_role_policy" "iam_emr_profile_policy" {
  343. # name = "iam_emr_profile_policy"
  344. # role = aws_iam_role.iam_emr_profile_role.id
  345. #
  346. # policy = data.aws_iam_policy_document.iam_emr_profile_policy.json
  347. #}