From 6f06b14ece2d10a3c04b27b228ebc5b7728139bd Mon Sep 17 00:00:00 2001 From: Norm Rasmussen Date: Thu, 17 Oct 2024 16:22:21 -0400 Subject: [PATCH] Finished the class and liquid variable extraction script. --- Scripts/all_default_class_names.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/Scripts/all_default_class_names.py b/Scripts/all_default_class_names.py index f04b9e2d..9afeff36 100644 --- a/Scripts/all_default_class_names.py +++ b/Scripts/all_default_class_names.py @@ -11,7 +11,7 @@ import re # MASDICT = {'file': "", 'attributes' : { 'id': [], 'class': [] } } pp = pprint.PrettyPrinter(indent=4) MASDICT = {} -MASLIST = [] +MASLIST = {} PARS = HTMLParser @@ -45,7 +45,7 @@ def main(): ilst = [node["id"] for node in soup.find_all() if node.has_attr("id")] flatid = flatten(ilst) - MASLIST.append({strpfile: {"includes": inclist,"liquid": quidlist, "class": flatclass, "id": flatid}}) + MASLIST.update({strpfile: {"includes": inclist,"liquid": quidlist, "class": flatclass, "id": flatid}}) MASDICT = {"data": MASLIST} # pp.pprint(MASDICT) to_pandas(MASDICT) @@ -65,7 +65,10 @@ def flatten(xs): def to_pandas(obj): # df = pd.json_normalize(obj) - df = pd.json_normalize(obj['data']) + # df = pd.DataFrame(obj['data']) + results = obj['data'] + df = pd.DataFrame.from_dict(results, orient='index') + df = df.reset_index().rename(columns={"index": "filename"}) print(df) df.to_csv('/Users/normrasmussen/Downloads/example_liquid.csv')