import re from html.parser import HTMLParser from pathlib import Path import pandas as pd # MASDICT = {'file': "", 'attributes' : { 'id': [], 'class': [] } } MASLIST = [] PARS = HTMLParser p = Path('/Users/normrasmussen/Documents/Work/Custom_Templates/current_templates_2-15-2024/') x = list(p.glob('**/*.html.liquid')) for posfile in x: file = str(posfile) htmlfile = open(file, 'r', encoding='utf-8') finfile = file.split('/')[-1] code = htmlfile.readlines() ids = [] classes = [] for lines in code: if "class=" in lines: tmpline = lines if "id=" not in tmpline: try: htmlclass = re.search('class="(.*)"', tmpline) classes.append(htmlclass.group(1)) except AttributeError as t: print("Class: ", t) pass finally: pass elif "id=" in lines: try: id = re.search('id="(.*)"', lines) ids.append(id.group(1)) except AttributeError as t: print("ID: ", t) pass finally: pass else: pass seconddict = { 'file': finfile, 'attributes': { 'id' : ids, 'class': classes } } MASLIST.append(seconddict) df = pd.json_normalize(MASLIST) print(df) # kp = (df.set_index('file').apply(lambda x: x.str.split(",").explode()).reset_index()) # ksp = df.set_index('file').apply(lambda x: df['attributes.class']([x.split(',') ])) # print(ksp) # kp2 = (df.set_index('file').apply(lambda x: x.str.split(" ").explode('attributes.class')).reset_index()) # print(kp2)