40 lines
1.2 KiB
Python
40 lines
1.2 KiB
Python
import requests
|
|
from bs4 import BeautifulSoup
|
|
from domains_list import DOMAINS
|
|
|
|
def get_college_name(url):
|
|
special_chars = [" | ", " \ ", " / ", " - ", ": ", " : " ]
|
|
aca_chars = [
|
|
"University", "university", "Academy", "academy", "College", "college", "Centre",
|
|
"centre", "institute", "Institute"
|
|
]
|
|
try:
|
|
response = requests.get(f"https://{url}")
|
|
soup = BeautifulSoup(response.content, "html.parser")
|
|
college_name = soup.find("title").text
|
|
except:
|
|
pass
|
|
else:
|
|
for chars in special_chars:
|
|
if chars in college_name:
|
|
tmpname = college_name.split(chars)
|
|
for words in tmpname:
|
|
for acas in aca_chars:
|
|
if acas in words:
|
|
return words
|
|
else:
|
|
return college_name
|
|
|
|
def main():
|
|
for domain in DOMAINS.keys():
|
|
name = get_college_name(domain)
|
|
if name is not None:
|
|
outFile = open("domains_w_names.py","a")
|
|
outFile.writelines(f"{domain}: {name} \n")
|
|
outFile.close()
|
|
print(f"{domain} - {name}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|