Files
Gainsight/Scripts/Anthology/find_domain_names.py

40 lines
1.2 KiB
Python

import requests
from bs4 import BeautifulSoup
from domains_list import DOMAINS
def get_college_name(url):
special_chars = [" | ", " \ ", " / ", " - ", ": ", " : " ]
aca_chars = [
"University", "university", "Academy", "academy", "College", "college", "Centre",
"centre", "institute", "Institute"
]
try:
response = requests.get(f"https://{url}")
soup = BeautifulSoup(response.content, "html.parser")
college_name = soup.find("title").text
except:
pass
else:
for chars in special_chars:
if chars in college_name:
tmpname = college_name.split(chars)
for words in tmpname:
for acas in aca_chars:
if acas in words:
return words
else:
return college_name
def main():
for domain in DOMAINS.keys():
name = get_college_name(domain)
if name is not None:
outFile = open("domains_w_names.py","a")
outFile.writelines(f"{domain}: {name} \n")
outFile.close()
print(f"{domain} - {name}")
if __name__ == "__main__":
main()