Finished the class and liquid variable extraction script.

This commit is contained in:
Norm Rasmussen
2024-10-17 16:22:21 -04:00
parent 5f27d1bf02
commit 6f06b14ece

View File

@ -11,7 +11,7 @@ import re
# MASDICT = {'file': "", 'attributes' : { 'id': [], 'class': [] } }
pp = pprint.PrettyPrinter(indent=4)
MASDICT = {}
MASLIST = []
MASLIST = {}
PARS = HTMLParser
@ -45,7 +45,7 @@ def main():
ilst = [node["id"] for node in soup.find_all() if node.has_attr("id")]
flatid = flatten(ilst)
MASLIST.append({strpfile: {"includes": inclist,"liquid": quidlist, "class": flatclass, "id": flatid}})
MASLIST.update({strpfile: {"includes": inclist,"liquid": quidlist, "class": flatclass, "id": flatid}})
MASDICT = {"data": MASLIST}
# pp.pprint(MASDICT)
to_pandas(MASDICT)
@ -65,7 +65,10 @@ def flatten(xs):
def to_pandas(obj):
# df = pd.json_normalize(obj)
df = pd.json_normalize(obj['data'])
# df = pd.DataFrame(obj['data'])
results = obj['data']
df = pd.DataFrame.from_dict(results, orient='index')
df = df.reset_index().rename(columns={"index": "filename"})
print(df)
df.to_csv('/Users/normrasmussen/Downloads/example_liquid.csv')