Mizuno and Aiim templates. Almost done with the default class and liquid extraction script.
This commit is contained in:
@ -1,64 +1,73 @@
|
||||
import re
|
||||
from html.parser import HTMLParser
|
||||
import json
|
||||
import pandas as pd
|
||||
from pathlib import Path
|
||||
# import pandas as pd
|
||||
from bs4 import BeautifulSoup
|
||||
import pprint
|
||||
import re
|
||||
|
||||
# from collections import Iterable
|
||||
|
||||
# MASDICT = {'file': "", 'attributes' : { 'id': [], 'class': [] } }
|
||||
pp = pprint.PrettyPrinter(indent=4)
|
||||
MASDICT = {}
|
||||
MASLIST = []
|
||||
PARS = HTMLParser
|
||||
p = Path('/Users/normrasmussen/Documents/Work/Custom_Templates/current_templates_2-15-2024/')
|
||||
x = list(p.glob('**/*.html.liquid'))
|
||||
for posfile in x:
|
||||
file = str(posfile)
|
||||
htmlfile = open(file, 'r', encoding='utf-8')
|
||||
file2 = open(file, 'r', encoding='utf-8')
|
||||
# finfile = file.split('/')[-1]
|
||||
code = file2.readlines()
|
||||
inclst = [include for include in code if '{% include' in include]
|
||||
print(inclst)
|
||||
for lines in code:
|
||||
if '{% include' in lines:
|
||||
pass
|
||||
|
||||
soup = BeautifulSoup(htmlfile, 'html.parser')
|
||||
clst = [node['class'] for node in soup.find_all() if node.has_attr('class')]
|
||||
# print(clst)
|
||||
ilst = [node['id'] for node in soup.find_all() if node.has_attr('id')]
|
||||
# print(ilst)
|
||||
|
||||
# ids = []
|
||||
# classes = []
|
||||
# if "class=" in lines:
|
||||
# tmpline = lines
|
||||
# if "id=" not in tmpline:
|
||||
# try:
|
||||
# htmlclass = re.search('class="(.*)"', tmpline)
|
||||
# classes.append(htmlclass.group(1))
|
||||
# except AttributeError as t:
|
||||
# print("Class: ", t)
|
||||
# pass
|
||||
# finally:
|
||||
# pass
|
||||
# elif "id=" in lines:
|
||||
# try:
|
||||
# id = re.search('id="(.*)"', lines)
|
||||
# ids.append(id.group(1))
|
||||
# except AttributeError as t:
|
||||
# print("ID: ", t)
|
||||
# pass
|
||||
# finally:
|
||||
# pass
|
||||
# else:
|
||||
# pass
|
||||
# seconddict = { 'file': finfile, 'attributes': { 'id' : ids, 'class': classes } }
|
||||
# MASLIST.append(seconddict)
|
||||
#
|
||||
# df = pd.json_normalize(MASLIST)
|
||||
# print(df)
|
||||
# kp = (df.set_index('file').apply(lambda x: x.str.split(",").explode()).reset_index())
|
||||
# ksp = df.set_index('file').apply(lambda x: df['attributes.class']([x.split(',') ]))
|
||||
# print(ksp)
|
||||
# kp2 = (df.set_index('file').apply(lambda x: x.str.split(" ").explode('attributes.class')).reset_index())
|
||||
# print(kp2)
|
||||
def main():
|
||||
p = Path(
|
||||
"/Users/normrasmussen/Documents/Work/Custom_Templates/current_templates_2-15-2024/"
|
||||
)
|
||||
x = list(p.glob("**/*.html.liquid"))
|
||||
for posfile in x:
|
||||
file = str(posfile)
|
||||
strpfile = file.split("/")[7]
|
||||
htmlfile = open(file, "r", encoding="utf-8")
|
||||
file2 = open(file, "r", encoding="utf-8")
|
||||
code = file2.readlines()
|
||||
liq = [liquid for liquid in code if re.search('{{(.*)}}', liquid)]
|
||||
quidlist = []
|
||||
for quid in liq:
|
||||
liqcludes = quid.strip()
|
||||
liqclude = re.search('{{(.*)}}', liqcludes)
|
||||
quidlist.append(liqclude.group(0))
|
||||
inclst = [include for include in code if "{% include" in include]
|
||||
inclist = []
|
||||
for item in inclst:
|
||||
includes = item.strip().split(" ")[2].replace('"','')
|
||||
inclist.append(includes)
|
||||
|
||||
soup = BeautifulSoup(htmlfile, "html.parser")
|
||||
clst = [node["class"] for node in soup.find_all() if node.has_attr("class")]
|
||||
flatclass = flatten(clst)
|
||||
flatclass = [it for it in flatclass if "np" in it]
|
||||
ilst = [node["id"] for node in soup.find_all() if node.has_attr("id")]
|
||||
flatid = flatten(ilst)
|
||||
|
||||
MASLIST.append({strpfile: {"includes": inclist,"liquid": quidlist, "class": flatclass, "id": flatid}})
|
||||
MASDICT = {"data": MASLIST}
|
||||
# pp.pprint(MASDICT)
|
||||
to_pandas(MASDICT)
|
||||
|
||||
|
||||
def flatten(xs):
|
||||
# Initialize list for this layer
|
||||
flat_list = []
|
||||
for x in xs:
|
||||
# If it's a list, recurse down and return the interior list
|
||||
if isinstance(x, list):
|
||||
flat_list += flatten(x)
|
||||
# Otherwise, add to this layer's list
|
||||
else:
|
||||
flat_list.append(x)
|
||||
return flat_list
|
||||
|
||||
def to_pandas(obj):
|
||||
# df = pd.json_normalize(obj)
|
||||
df = pd.json_normalize(obj['data'])
|
||||
print(df)
|
||||
df.to_csv('/Users/normrasmussen/Downloads/example_liquid.csv')
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
Reference in New Issue
Block a user