import requests from bs4 import BeautifulSoup from domains_list import DOMAINS def get_college_name(url): special_chars = [" | ", " \ ", " / ", " - ", ": ", " : " ] aca_chars = [ "University", "university", "Academy", "academy", "College", "college", "Centre", "centre", "institute", "Institute" ] try: response = requests.get(f"https://{url}") soup = BeautifulSoup(response.content, "html.parser") college_name = soup.find("title").text except: pass else: for chars in special_chars: if chars in college_name: tmpname = college_name.split(chars) for words in tmpname: for acas in aca_chars: if acas in words: return words else: return college_name def main(): for domain in DOMAINS.keys(): name = get_college_name(domain) if name is not None: outFile = open("domains_w_names.py","a") outFile.writelines(f"{domain}: {name} \n") outFile.close() print(f"{domain} - {name}") if __name__ == "__main__": main()