123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101 |
- #resource "aws_emr_cluster" "cluster" {
- # name = "monkeybox-emr-lab"
- # release_label = "emr-5.36.0"
- # applications = ["Spark"]
- # log_uri = "s3://monkeybox-emr-test/logs"
- #
- # ec2_attributes {
- # subnet_id = aws_subnet.main.id
- # emr_managed_master_security_group = aws_security_group.allow_access.id
- # emr_managed_slave_security_group = aws_security_group.allow_access.id
- # instance_profile = aws_iam_instance_profile.emr_profile.arn
- # }
- #
- # master_instance_group {
- # name = "monkeybox-emr-lab-master"
- # bid_price = "0.07" # Don't set this over the hourly cost!
- # instance_type = "m5.xlarge"
- # }
- #
- # core_instance_group {
- # name = "monkeybox-emr-lab-core"
- # bid_price = "0.07" # Don't set this over the hourly cost!
- # instance_count = 2
- # instance_type = "m5.xlarge"
- # }
- #
- # tags = {
- # project = "monkeybox_emr_lab"
- # }
- #
- # bootstrap_action {
- # path = "s3://us-east-2.elasticmapreduce/bootstrap-actions/run-if"
- # name = "runif"
- # args = ["instance.isMaster=true", "echo running on master node"]
- # }
- #
- # auto_termination_policy {
- # idle_timeout = 60
- # }
- #
- # step {
- # name = "Install modules"
- # action_on_failure = "CONTINUE"
- #
- # hadoop_jar_step {
- # jar = "command-runner.jar"
- # args = ["sudo", "python3", "-m", "pip", "install", "numpy", "matplotlib", "pandas", "seaborn", "pyspark"]
- # }
- # }
- #
- # step {
- # name = "Copy script file from s3."
- # action_on_failure = "CONTINUE"
- #
- # hadoop_jar_step {
- # jar = "command-runner.jar"
- # args = ["aws", "s3", "cp", "s3://monkeybox-emr-test/health_violations.py", "/home/hadoop/"]
- # }
- # }
- #
- # step {
- # name = "Pyspark Job."
- # action_on_failure = "CONTINUE"
- #
- # hadoop_jar_step {
- # jar = "command-runner.jar"
- # args = ["sudo", "python3", "/home/hadoop/health_violations.py", "--data_source", "s3://monkeybox-emr-test/food_establishment_data.csv", "--output_uri", "s3://monkeybox-emr-test/output/"]
- # }
- # }
- #
- # # configurations_json = <<EOF
- # #[
- # # {
- # # "Classification": "hadoop-env",
- # # "Configurations": [
- # # {
- # # "Classification": "export",
- # # "Properties": {
- # # "JAVA_HOME": "/usr/lib/jvm/java-1.8.0"
- # # }
- # # }
- # # ],
- # # "Properties": {}
- # # },
- # # {
- # # "Classification": "spark-env",
- # # "Configurations": [
- # # {
- # # "Classification": "export",
- # # "Properties": {
- # # "JAVA_HOME": "/usr/lib/jvm/java-1.8.0"
- # # }
- # # }
- # # ],
- # # "Properties": {}
- # # }
- # #]
- # #EOF
- #
- # service_role = aws_iam_role.iam_emr_service_role.arn
- #}
|