Files
Gainsight/Scripts/all_default_class_names.py

77 lines
2.4 KiB
Python
Raw Permalink Normal View History

from html.parser import HTMLParser
import json
import pandas as pd
from pathlib import Path
from bs4 import BeautifulSoup
import pprint
import re
# from collections import Iterable
# MASDICT = {'file': "", 'attributes' : { 'id': [], 'class': [] } }
pp = pprint.PrettyPrinter(indent=4)
MASDICT = {}
MASLIST = {}
PARS = HTMLParser
def main():
p = Path(
"/Users/normrasmussen/Documents/Work/Custom_Templates/current_templates_2-15-2024/"
)
x = list(p.glob("**/*.html.liquid"))
for posfile in x:
file = str(posfile)
strpfile = file.split("/")[7]
htmlfile = open(file, "r", encoding="utf-8")
file2 = open(file, "r", encoding="utf-8")
code = file2.readlines()
liq = [liquid for liquid in code if re.search('{{(.*)}}', liquid)]
quidlist = []
for quid in liq:
liqcludes = quid.strip()
liqclude = re.search('{{(.*)}}', liqcludes)
quidlist.append(liqclude.group(0))
inclst = [include for include in code if "{% include" in include]
inclist = []
for item in inclst:
includes = item.strip().split(" ")[2].replace('"','')
inclist.append(includes)
soup = BeautifulSoup(htmlfile, "html.parser")
clst = [node["class"] for node in soup.find_all() if node.has_attr("class")]
flatclass = flatten(clst)
flatclass = [it for it in flatclass if "np" in it]
ilst = [node["id"] for node in soup.find_all() if node.has_attr("id")]
flatid = flatten(ilst)
MASLIST.update({strpfile: {"includes": inclist,"liquid": quidlist, "class": flatclass, "id": flatid}})
MASDICT = {"data": MASLIST}
# pp.pprint(MASDICT)
to_pandas(MASDICT)
def flatten(xs):
# Initialize list for this layer
flat_list = []
for x in xs:
# If it's a list, recurse down and return the interior list
if isinstance(x, list):
flat_list += flatten(x)
# Otherwise, add to this layer's list
else:
flat_list.append(x)
return flat_list
def to_pandas(obj):
# df = pd.json_normalize(obj)
# df = pd.DataFrame(obj['data'])
results = obj['data']
df = pd.DataFrame.from_dict(results, orient='index')
df = df.reset_index().rename(columns={"index": "filename"})
print(df)
df.to_csv('/Users/normrasmussen/Downloads/example_liquid.csv')
if __name__ == "__main__":
main()