Kaynağa Gözat

Disables Metadata for Splunk Servers; Adds Some Support for IMDSv2

To be tagged v4.2.3
Fred Damstra [afs macbook] 3 yıl önce
ebeveyn
işleme
bd6e0a02e8

+ 2 - 1
base/customer_portal/cloud-init/cloud-init.tpl

@@ -51,7 +51,8 @@ growpart:
   ignore_growroot_disabled: false
 
 bootcmd:
- - "INSTANCE_ID=`/usr/bin/curl -f --connect-timeout 1 --silent http://169.254.169.254/latest/meta-data/instance-id | tail -c 3`"
+ - "IMDS2_TOKEN=`curl --silent --fail -X PUT --connect-timeout 1 --max-time 2 'http://169.254.169.254/latest/api/token' -H 'X-aws-ec2-metadata-token-ttl-seconds: 90'`"
+ - "INSTANCE_ID=`/usr/bin/curl -f --connect-timeout 1 -H X-aws-ec2-metadata-token:\\ $IMDS2_TOKEN --silent http://169.254.169.254/latest/meta-data/instance-id | tail -c 3`"
  - "/bin/hostnamectl set-hostname customer-portal-$INSTANCE_ID'.${zone}'"
  - "echo customer-portal-$INSTANCE_ID'.${zone}' > /etc/salt/minion_id"
 

+ 1 - 1
base/splunk_servers/cluster_master/main.tf

@@ -36,7 +36,7 @@ resource "aws_instance" "instance" {
   iam_instance_profile                 = module.instance_profile.profile_id
 
   metadata_options {
-    http_tokens = "required"
+    http_tokens = "optional" # tfsec:ignore:aws-ec2-enforce-http-token-imds Splunk uses v1 by default. MSOCI-2150
   }
 
 

+ 1 - 1
base/splunk_servers/customer_searchhead/main.tf

@@ -38,7 +38,7 @@ resource "aws_instance" "instance" {
   iam_instance_profile                 = module.instance_profile.profile_id
 
   metadata_options {
-    http_tokens = "required"
+    http_tokens = "optional" # tfsec:ignore:aws-ec2-enforce-http-token-imds Splunk uses v1 by default. MSOCI-2150
   }
 
   ami = local.ami_map[local.ami_selection]

+ 1 - 2
base/splunk_servers/heavy_forwarder/main.tf

@@ -35,9 +35,8 @@ resource "aws_instance" "instance" {
   monitoring                           = false
   iam_instance_profile                 = module.instance_profile.profile_id
 
-  # tfsec:ignore:aws-ec2-enforce-http-token-imds TODO: Test if HF works with tokens. Not willing to introduce a possible breaking change here.
   metadata_options {
-    http_tokens = "optional"
+    http_tokens = "optional" # tfsec:ignore:aws-ec2-enforce-http-token-imds Splunk uses v1 by default. AWS Addon doesn't support it at all? MSOCI-2150
   }
 
 

+ 3 - 3
base/splunk_servers/indexer_cluster/asg.tf

@@ -17,7 +17,7 @@ module "indexer0" {
   tags                       = merge(var.standard_tags, var.tags, var.instance_tags[0], { Name = "${local.asg_name}-0" })
 
   # 2022-04-22: FTD - Debugging dying indexers in test
-  suspended_processes = var.environment == "test" ? ["Terminate"] : []
+  #suspended_processes = var.environment == "test" ? ["Terminate"] : []
 }
 
 module "indexer1" {
@@ -39,7 +39,7 @@ module "indexer1" {
   tags                       = merge(var.standard_tags, var.tags, var.instance_tags[1], { Name = "${local.asg_name}-1" })
 
   # 2022-04-22: FTD - Debugging dying indexers in test
-  suspended_processes = var.environment == "test" ? ["Terminate"] : []
+  #suspended_processes = var.environment == "test" ? ["Terminate"] : []
 }
 
 module "indexer2" {
@@ -61,5 +61,5 @@ module "indexer2" {
   tags                       = merge(var.standard_tags, var.tags, var.instance_tags[2], { Name = "${local.asg_name}-2" })
 
   # 2022-04-22: FTD - Debugging dying indexers in test
-  suspended_processes = var.environment == "test" ? ["Terminate"] : []
+  #suspended_processes = var.environment == "test" ? ["Terminate"] : []
 }

+ 2 - 1
base/splunk_servers/indexer_cluster/cloud-init/cloud-init.tpl

@@ -61,7 +61,8 @@ growpart:
   ignore_growroot_disabled: false
 
 bootcmd:
- - "INSTANCE_ID=`/usr/bin/curl -f --connect-timeout 1 --silent http://169.254.169.254/latest/meta-data/instance-id | tail -c 3`"
+ - "IMDS2_TOKEN=`curl --silent --fail -X PUT --connect-timeout 1 --max-time 2 'http://169.254.169.254/latest/api/token' -H 'X-aws-ec2-metadata-token-ttl-seconds: 90'`"
+ - "INSTANCE_ID=`/usr/bin/curl -f --connect-timeout 1 -H X-aws-ec2-metadata-token:\\ $IMDS2_TOKEN --silent http://169.254.169.254/latest/meta-data/instance-id | tail -c 3`"
  - "/bin/hostnamectl set-hostname ${prefix}-splunk-idx-$INSTANCE_ID'.${zone}'"
  - "echo ${prefix}-splunk-idx-$INSTANCE_ID'.${zone}' > /etc/salt/minion_id"
 

+ 3 - 2
base/splunk_servers/indexer_cluster/cloud-init/nvme-setup.sh

@@ -18,11 +18,12 @@ HOT_LV_NAME="lv_splunkhot"
 # ephemeral disks are available
 HOT_EBS_VOLUMES="xvdg xvdh"
 
-CURL="curl -f --connect-timeout 1 --silent"
+IMDS2_TOKEN=$( curl --silent --fail -X PUT --connect-timeout 1 --max-time 2 'http://169.254.169.254/latest/api/token' -H 'X-aws-ec2-metadata-token-ttl-seconds: 90' )
+CURL="curl -f --connect-timeout 1 -silent"
 declare -A EBSMAP
 
 # Yes heavy assumption we're on AWS
-INSTANCE_TYPE="`${CURL} http://169.254.169.254/latest/meta-data/instance-type`"
+INSTANCE_TYPE="`${CURL} -H X-aws-ec2-metadata-token:\ ${IMDS2_TOKEN} http://169.254.169.254/latest/meta-data/instance-type`"
 if [[ "$INSTANCE_TYPE" == "" ]]; then
 	echo "Could not figure out instance type, giving up"
 	exit 1

+ 1 - 1
base/splunk_servers/searchhead/main.tf

@@ -39,7 +39,7 @@ resource "aws_instance" "instance" {
   iam_instance_profile                 = local.is_moose ? module.moose_instance_profile[0].profile_id : "splunk-sh-instance-profile"
 
   metadata_options {
-    http_tokens = "required"
+    http_tokens = "optional" # tfsec:ignore:aws-ec2-enforce-http-token-imds Splunk uses v1 by default. MSOCI-2150
   }
 
   ami = local.ami_map[local.ami_selection]

+ 26 - 1
submodules/splunk/splunk_indexer_asg/main.tf

@@ -38,7 +38,7 @@ resource "aws_launch_template" "splunk_indexer" {
 
   metadata_options {
     http_endpoint = "enabled"
-    http_tokens   = "required"
+    http_tokens   = "optional" #tfsec:ignore:aws-autoscaling-enforce-http-token-imds Smartstore needs to be configured to use imdsv2, MSOCI-2150
   }
 
   network_interfaces {
@@ -207,5 +207,30 @@ resource "aws_autoscaling_group" "splunk_indexer_asg" {
   # how long to wait for a healthy instance. Default is 10m, which sucks when troubleshooting, but larger instances need it
   #wait_for_capacity_timeout = "1m"
 
+  # Default metrics for ASG
+  enabled_metrics = [
+    "GroupAndWarmPoolDesiredCapacity",
+    "GroupAndWarmPoolTotalCapacity",
+    "GroupDesiredCapacity",
+    "GroupInServiceCapacity",
+    "GroupInServiceInstances",
+    "GroupMaxSize",
+    "GroupMinSize",
+    "GroupPendingCapacity",
+    "GroupPendingInstances",
+    "GroupStandbyCapacity",
+    "GroupStandbyInstances",
+    "GroupTerminatingCapacity",
+    "GroupTerminatingInstances",
+    "GroupTotalCapacity",
+    "GroupTotalInstances",
+    "WarmPoolDesiredCapacity",
+    "WarmPoolMinSize",
+    "WarmPoolPendingCapacity",
+    "WarmPoolTerminatingCapacity",
+    "WarmPoolTotalCapacity",
+    "WarmPoolWarmedCapacity",
+  ]
+
   suspended_processes = var.suspended_processes
 }