Gainsight/Scripts/Anthology/find_domain_names.py

import requests
from bs4 import BeautifulSoup
from domains_list import DOMAINS

def get_college_name(url):
    special_chars = [" | ", " \ ", " / ", " - ", ": ", " : " ]
    aca_chars = [
        "University", "university", "Academy", "academy", "College", "college", "Centre",
        "centre", "institute", "Institute"
    ]
    try:
        response = requests.get(f"https://{url}")
        soup = BeautifulSoup(response.content, "html.parser")
        college_name = soup.find("title").text
    except:
        pass
    else:
        for chars in special_chars:
            if chars in college_name:
                tmpname = college_name.split(chars)
                for words in tmpname:
                    for acas in aca_chars:
                        if acas in words:
                            return words
            else:
                return college_name

def main():
    for domain in DOMAINS.keys():
        name = get_college_name(domain)
        if name is not None:
            outFile = open("domains_w_names.py","a")
            outFile.writelines(f"{domain}: {name} \n")
            outFile.close()
            print(f"{domain} - {name}")


if __name__ == "__main__":
    main()