ports_scrapper.py 4.5 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192
  1. #!/usr/local/bin/python3
  2. '''
  3. ports_scrapper.py
  4. The purpose of the script is to parse all .tf files in the xdr-terraform-modules git repo.
  5. While parsing the .tf files, the open ports are gathered to help update the SSP.
  6. '''
  7. import logging
  8. import sys
  9. import os
  10. import csv
  11. try:
  12. import hcl2
  13. except ModuleNotFoundError:
  14. print("Module 'hcl2' is not installed. Try 'pip3 install python-hcl2'")
  15. sys.exit()
  16. def get_current_dir():
  17. '''Pulls current directory and returns it. Each user places the git repo at a diff location'''
  18. current_dir = os.path.dirname(os.path.realpath(__file__))
  19. relative_path = os.path.dirname(current_dir)
  20. correct_path = os.path.join(relative_path, "base/")
  21. return correct_path
  22. def get_files(base_dir):
  23. '''Gathers all the .tf files in a list and ignores some files '''
  24. print("Looking in " + base_dir +" for TF hcl files")
  25. r = []
  26. for root, dirs, files in os.walk(base_dir):
  27. for name in files:
  28. filepath = root + os.sep + name
  29. if filepath.endswith(".tf") and not filepath.endswith("vars.tf") and not filepath.endswith("amis.tf") and not filepath.endswith("outputs.tf"):
  30. r.append(os.path.join(root, name))
  31. return r
  32. def parse_tf_files(files):
  33. '''Parses the .tf files and returns the ports'''
  34. my_resources = []
  35. my_resources_dict = {}
  36. for file in files:
  37. #print(file)
  38. with open(file, 'r') as open_file:
  39. try:
  40. dict = hcl2.load(open_file)
  41. except:
  42. print("WARNING: " + file + " This file did not load successfully and was skipped!")
  43. min_file = file.split('/xdr-terraform-modules')
  44. for resources in dict.get("resource", []):
  45. for resource in resources.keys():
  46. for resource_name in resources[resource].keys():
  47. for item_name in resources[resource][resource_name].keys():
  48. if item_name == "from_port" or item_name == "to_port": #only look at the ports
  49. if type(resources[resource][resource_name][item_name]) is int: #only grab it if is is a number
  50. if resources[resource][resource_name]["from_port"] == resources[resource][resource_name]["to_port"]:
  51. #if the from_port and to_port are the same only grab it once
  52. my_resources.append(min_file[1] + "," + resource_name + "," + item_name + "," + str(resources[resource][resource_name][item_name]) + "," + resources[resource][resource_name]["protocol"])
  53. dict_input = min_file[1] + "," + resource_name + "," + item_name + "," + str(resources[resource][resource_name][item_name]) + "," + resources[resource][resource_name]["protocol"]
  54. my_resources_dict[resources[resource][resource_name][item_name]] = dict_input
  55. break
  56. else:
  57. #grab both from_port and to_port
  58. my_resources.append(min_file[1] + "," + resource_name + "," + item_name + "," + str(resources[resource][resource_name][item_name]) + "," + resources[resource][resource_name]["protocol"])
  59. dict_input = min_file[1] + "," + resource_name + "," + item_name + "," + str(resources[resource][resource_name][item_name]) + "," + resources[resource][resource_name]["protocol"]
  60. my_resources_dict[resources[resource][resource_name][item_name]] = dict_input
  61. print("Found "+ str(len(my_resources)) + " port references.")
  62. return my_resources, my_resources_dict
  63. def dedup_generate_csv(my_resources, my_resources_dict):
  64. '''Cleans up the data and outputs it to a file.'''
  65. with open('xdr_port_references.csv', mode='w') as xdr_port_references:
  66. #writer = csv.writer(xdr_port_references)
  67. for item in my_resources_dict:
  68. xdr_port_references.write(my_resources_dict[item] + "\n")
  69. #print(item)
  70. with open('all_xdr_port_references.csv', mode='w') as xdr_port_references:
  71. #writer = csv.writer(xdr_port_references)
  72. for item in my_resources:
  73. xdr_port_references.write(item+ "\n")
  74. #print(item)
  75. if __name__ == "__main__":
  76. correct_path = get_current_dir()
  77. files = get_files(correct_path)
  78. my_resources, my_resources_dict = parse_tf_files(files)
  79. dedup_generate_csv(my_resources, my_resources_dict)
  80. print("Completed Successfully. Please open csv files.")
  81. sys.exit(0)