#!/usr/local/bin/python3 ''' ports_scrapper.py The purpose of the script is to parse all .tf files in the xdr-terraform-modules git repo. While parsing the .tf files, the open ports are gathered to help update the SSP. ''' import logging import sys import os import csv try: import hcl2 except ModuleNotFoundError: print("Module 'hcl2' is not installed. Try 'pip3 install python-hcl2'") sys.exit() def get_current_dir(): '''Pulls current directory and returns it. Each user places the git repo at a diff location''' current_dir = os.path.dirname(os.path.realpath(__file__)) relative_path = os.path.dirname(current_dir) correct_path = os.path.join(relative_path, "base/") return correct_path def get_files(base_dir): '''Gathers all the .tf files in a list and ignores some files ''' print("Looking in " + base_dir +" for TF hcl files") r = [] for root, dirs, files in os.walk(base_dir): for name in files: filepath = root + os.sep + name if filepath.endswith(".tf") and not filepath.endswith("vars.tf") and not filepath.endswith("amis.tf") and not filepath.endswith("outputs.tf"): r.append(os.path.join(root, name)) return r def parse_tf_files(files): '''Parses the .tf files and returns the ports''' my_resources = [] my_resources_dict = {} for file in files: #print(file) with open(file, 'r') as open_file: try: dict = hcl2.load(open_file) except: print("WARNING: " + file + " This file did not load successfully and was skipped!") min_file = file.split('/xdr-terraform-modules') for resources in dict.get("resource", []): for resource in resources.keys(): for resource_name in resources[resource].keys(): for item_name in resources[resource][resource_name].keys(): if item_name == "from_port" or item_name == "to_port": #only look at the ports if type(resources[resource][resource_name][item_name]) is int: #only grab it if is is a number if resources[resource][resource_name]["from_port"] == resources[resource][resource_name]["to_port"]: #if the from_port and to_port are the same only grab it once my_resources.append(min_file[1] + "," + resource_name + "," + item_name + "," + str(resources[resource][resource_name][item_name]) + "," + resources[resource][resource_name]["protocol"]) dict_input = min_file[1] + "," + resource_name + "," + item_name + "," + str(resources[resource][resource_name][item_name]) + "," + resources[resource][resource_name]["protocol"] my_resources_dict[resources[resource][resource_name][item_name]] = dict_input break else: #grab both from_port and to_port my_resources.append(min_file[1] + "," + resource_name + "," + item_name + "," + str(resources[resource][resource_name][item_name]) + "," + resources[resource][resource_name]["protocol"]) dict_input = min_file[1] + "," + resource_name + "," + item_name + "," + str(resources[resource][resource_name][item_name]) + "," + resources[resource][resource_name]["protocol"] my_resources_dict[resources[resource][resource_name][item_name]] = dict_input print("Found "+ str(len(my_resources)) + " port references.") return my_resources, my_resources_dict def dedup_generate_csv(my_resources, my_resources_dict): '''Cleans up the data and outputs it to a file.''' with open('xdr_port_references.csv', mode='w') as xdr_port_references: #writer = csv.writer(xdr_port_references) for item in my_resources_dict: xdr_port_references.write(my_resources_dict[item] + "\n") #print(item) with open('all_xdr_port_references.csv', mode='w') as xdr_port_references: #writer = csv.writer(xdr_port_references) for item in my_resources: xdr_port_references.write(item+ "\n") #print(item) if __name__ == "__main__": correct_path = get_current_dir() files = get_files(correct_path) my_resources, my_resources_dict = parse_tf_files(files) dedup_generate_csv(my_resources, my_resources_dict) print("Completed Successfully. Please open csv files.") sys.exit(0)