2024-10-11 16:50:08 -04:00
|
|
|
import re
|
|
|
|
|
from html.parser import HTMLParser
|
|
|
|
|
from pathlib import Path
|
2024-10-15 17:02:33 -04:00
|
|
|
# import pandas as pd
|
|
|
|
|
from bs4 import BeautifulSoup
|
2024-10-11 16:50:08 -04:00
|
|
|
|
|
|
|
|
# MASDICT = {'file': "", 'attributes' : { 'id': [], 'class': [] } }
|
|
|
|
|
MASLIST = []
|
|
|
|
|
PARS = HTMLParser
|
|
|
|
|
p = Path('/Users/normrasmussen/Documents/Work/Custom_Templates/current_templates_2-15-2024/')
|
|
|
|
|
x = list(p.glob('**/*.html.liquid'))
|
|
|
|
|
for posfile in x:
|
|
|
|
|
file = str(posfile)
|
|
|
|
|
htmlfile = open(file, 'r', encoding='utf-8')
|
2024-10-15 17:02:33 -04:00
|
|
|
file2 = open(file, 'r', encoding='utf-8')
|
|
|
|
|
# finfile = file.split('/')[-1]
|
|
|
|
|
code = file2.readlines()
|
|
|
|
|
inclst = [include for include in code if '{% include' in include]
|
|
|
|
|
print(inclst)
|
2024-10-11 16:50:08 -04:00
|
|
|
for lines in code:
|
2024-10-15 17:02:33 -04:00
|
|
|
if '{% include' in lines:
|
2024-10-11 16:50:08 -04:00
|
|
|
pass
|
|
|
|
|
|
2024-10-15 17:02:33 -04:00
|
|
|
soup = BeautifulSoup(htmlfile, 'html.parser')
|
|
|
|
|
clst = [node['class'] for node in soup.find_all() if node.has_attr('class')]
|
|
|
|
|
# print(clst)
|
|
|
|
|
ilst = [node['id'] for node in soup.find_all() if node.has_attr('id')]
|
|
|
|
|
# print(ilst)
|
|
|
|
|
|
|
|
|
|
# ids = []
|
|
|
|
|
# classes = []
|
|
|
|
|
# if "class=" in lines:
|
|
|
|
|
# tmpline = lines
|
|
|
|
|
# if "id=" not in tmpline:
|
|
|
|
|
# try:
|
|
|
|
|
# htmlclass = re.search('class="(.*)"', tmpline)
|
|
|
|
|
# classes.append(htmlclass.group(1))
|
|
|
|
|
# except AttributeError as t:
|
|
|
|
|
# print("Class: ", t)
|
|
|
|
|
# pass
|
|
|
|
|
# finally:
|
|
|
|
|
# pass
|
|
|
|
|
# elif "id=" in lines:
|
|
|
|
|
# try:
|
|
|
|
|
# id = re.search('id="(.*)"', lines)
|
|
|
|
|
# ids.append(id.group(1))
|
|
|
|
|
# except AttributeError as t:
|
|
|
|
|
# print("ID: ", t)
|
|
|
|
|
# pass
|
|
|
|
|
# finally:
|
|
|
|
|
# pass
|
|
|
|
|
# else:
|
|
|
|
|
# pass
|
|
|
|
|
# seconddict = { 'file': finfile, 'attributes': { 'id' : ids, 'class': classes } }
|
|
|
|
|
# MASLIST.append(seconddict)
|
|
|
|
|
#
|
|
|
|
|
# df = pd.json_normalize(MASLIST)
|
|
|
|
|
# print(df)
|
2024-10-11 16:50:08 -04:00
|
|
|
# kp = (df.set_index('file').apply(lambda x: x.str.split(",").explode()).reset_index())
|
|
|
|
|
# ksp = df.set_index('file').apply(lambda x: df['attributes.class']([x.split(',') ]))
|
|
|
|
|
# print(ksp)
|
|
|
|
|
# kp2 = (df.set_index('file').apply(lambda x: x.str.split(" ").explode('attributes.class')).reset_index())
|
|
|
|
|
# print(kp2)
|
|
|
|
|
|