Mizuno and Aiim templates. Almost done with the default class and liquid extraction script.

This commit is contained in:
Norm Rasmussen
2024-10-16 17:15:05 -04:00
parent b3b6b4afea
commit 5f27d1bf02
200 changed files with 5797 additions and 87 deletions

View File

@ -1,64 +1,73 @@
import re
from html.parser import HTMLParser
import json
import pandas as pd
from pathlib import Path
# import pandas as pd
from bs4 import BeautifulSoup
import pprint
import re
# from collections import Iterable
# MASDICT = {'file': "", 'attributes' : { 'id': [], 'class': [] } }
pp = pprint.PrettyPrinter(indent=4)
MASDICT = {}
MASLIST = []
PARS = HTMLParser
p = Path('/Users/normrasmussen/Documents/Work/Custom_Templates/current_templates_2-15-2024/')
x = list(p.glob('**/*.html.liquid'))
for posfile in x:
file = str(posfile)
htmlfile = open(file, 'r', encoding='utf-8')
file2 = open(file, 'r', encoding='utf-8')
# finfile = file.split('/')[-1]
code = file2.readlines()
inclst = [include for include in code if '{% include' in include]
print(inclst)
for lines in code:
if '{% include' in lines:
pass
soup = BeautifulSoup(htmlfile, 'html.parser')
clst = [node['class'] for node in soup.find_all() if node.has_attr('class')]
# print(clst)
ilst = [node['id'] for node in soup.find_all() if node.has_attr('id')]
# print(ilst)
# ids = []
# classes = []
# if "class=" in lines:
# tmpline = lines
# if "id=" not in tmpline:
# try:
# htmlclass = re.search('class="(.*)"', tmpline)
# classes.append(htmlclass.group(1))
# except AttributeError as t:
# print("Class: ", t)
# pass
# finally:
# pass
# elif "id=" in lines:
# try:
# id = re.search('id="(.*)"', lines)
# ids.append(id.group(1))
# except AttributeError as t:
# print("ID: ", t)
# pass
# finally:
# pass
# else:
# pass
# seconddict = { 'file': finfile, 'attributes': { 'id' : ids, 'class': classes } }
# MASLIST.append(seconddict)
#
# df = pd.json_normalize(MASLIST)
# print(df)
# kp = (df.set_index('file').apply(lambda x: x.str.split(",").explode()).reset_index())
# ksp = df.set_index('file').apply(lambda x: df['attributes.class']([x.split(',') ]))
# print(ksp)
# kp2 = (df.set_index('file').apply(lambda x: x.str.split(" ").explode('attributes.class')).reset_index())
# print(kp2)
def main():
p = Path(
"/Users/normrasmussen/Documents/Work/Custom_Templates/current_templates_2-15-2024/"
)
x = list(p.glob("**/*.html.liquid"))
for posfile in x:
file = str(posfile)
strpfile = file.split("/")[7]
htmlfile = open(file, "r", encoding="utf-8")
file2 = open(file, "r", encoding="utf-8")
code = file2.readlines()
liq = [liquid for liquid in code if re.search('{{(.*)}}', liquid)]
quidlist = []
for quid in liq:
liqcludes = quid.strip()
liqclude = re.search('{{(.*)}}', liqcludes)
quidlist.append(liqclude.group(0))
inclst = [include for include in code if "{% include" in include]
inclist = []
for item in inclst:
includes = item.strip().split(" ")[2].replace('"','')
inclist.append(includes)
soup = BeautifulSoup(htmlfile, "html.parser")
clst = [node["class"] for node in soup.find_all() if node.has_attr("class")]
flatclass = flatten(clst)
flatclass = [it for it in flatclass if "np" in it]
ilst = [node["id"] for node in soup.find_all() if node.has_attr("id")]
flatid = flatten(ilst)
MASLIST.append({strpfile: {"includes": inclist,"liquid": quidlist, "class": flatclass, "id": flatid}})
MASDICT = {"data": MASLIST}
# pp.pprint(MASDICT)
to_pandas(MASDICT)
def flatten(xs):
# Initialize list for this layer
flat_list = []
for x in xs:
# If it's a list, recurse down and return the interior list
if isinstance(x, list):
flat_list += flatten(x)
# Otherwise, add to this layer's list
else:
flat_list.append(x)
return flat_list
def to_pandas(obj):
# df = pd.json_normalize(obj)
df = pd.json_normalize(obj['data'])
print(df)
df.to_csv('/Users/normrasmussen/Downloads/example_liquid.csv')
if __name__ == "__main__":
main()