Deleted a bunch of files and cleaned things up. Started the script for finding correct domain names for Anthology. Recast templates. Todolist.

This commit is contained in:
Norm Rasmussen
2024-03-08 19:18:46 -05:00
parent 3490b97063
commit a3455a720c
111 changed files with 3321 additions and 1639 deletions

View File

@ -0,0 +1,8 @@
import pandas as pd
import requests
MASTER = "~/Downloads/Anthology-Master-CSV-FirstChanges.csv"
df = pd.read_csv(MASTER)
for row in df.iterrows():
print(row)

View File

@ -0,0 +1,26 @@
import csv
import pandas as pd
def grab_vals():
file = "/Users/normrasmussen/Downloads/antho_domains.csv"
final_array = []
with open(file, 'r') as file:
reader = csv.reader(file)
for row in reader:
for item in row:
if item != '':
final_array.append(item)
print(len(final_array))
final_list = list(set(final_array))
print(len(final_list))
final = ["@"+domain for domain in final_list]
# print(final)
filecsv = pd.DataFrame(final)
filecsv.to_csv("/Users/normrasmussen/Downloads/final_domains.csv")
if __name__ == "__main__":
grab_vals()

View File

@ -0,0 +1,4 @@
domain, group_1, ,
newanthology.com, Anthology 101 - (T1), Anthology 101 - (T2),
sometest.com, Anthology 101 - (T1), Anthology 101 - (T4),
tc.columbia.edu, Anthology 101 - (T2), ,
1 domain group_1
2 newanthology.com Anthology 101 - (T1) Anthology 101 - (T2)
3 sometest.com Anthology 101 - (T1) Anthology 101 - (T4)
4 tc.columbia.edu Anthology 101 - (T2)

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,10 @@
from requests_html import HTMLSession
from domains_list import DOMAINS
for domain in DOMAINS:
URL = f"https://{domain}"
session = HTMLSession()
resp = session.get(URL)
title = resp.html.find('head > title', first=True)
group_name = title.text.split("|")[0]
print(group_name)

File diff suppressed because it is too large Load Diff