Fixed an error in blacklane's templates (dont try to mix programming languages...) Anthology domains list was finalized and pulled ready for client review.

This commit is contained in:
Norm Rasmussen
2024-03-15 20:45:21 -04:00
parent f32ce078e1
commit 1394112a9b
7 changed files with 4160 additions and 1117 deletions

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,17 +1,39 @@
from requests_html import HTMLSession
import requests
from bs4 import BeautifulSoup
from domains_list import DOMAINS
for domain in DOMAINS:
URL = f"https://{domain}"
session = HTMLSession()
resp = session.get(URL)
def get_college_name(url):
special_chars = [" | ", " \ ", " / ", " - ", ": ", " : " ]
aca_chars = [
"University", "university", "Academy", "academy", "College", "college", "Centre",
"centre", "institute", "Institute"
]
try:
title = resp.html.find('head > title', first=True)
group_name = title.text.split("|")
print(group_name)
except AttributeError as e:
print(e)
finally:
title = resp.html.find('head > meta:nth-child(3)', first=True)
print(title.text)
response = requests.get(f"https://{url}")
soup = BeautifulSoup(response.content, "html.parser")
college_name = soup.find("title").text
except:
pass
else:
for chars in special_chars:
if chars in college_name:
tmpname = college_name.split(chars)
for words in tmpname:
for acas in aca_chars:
if acas in words:
return words
else:
return college_name
def main():
for domain in DOMAINS.keys():
name = get_college_name(domain)
if name is not None:
outFile = open("domains_w_names.py","a")
outFile.writelines(f"{domain}: {name} \n")
outFile.close()
print(f"{domain} - {name}")
if __name__ == "__main__":
main()