import re from html.parser import HTMLParser from pathlib import Path # import pandas as pd from bs4 import BeautifulSoup # MASDICT = {'file': "", 'attributes' : { 'id': [], 'class': [] } } MASLIST = [] PARS = HTMLParser p = Path('/Users/normrasmussen/Documents/Work/Custom_Templates/current_templates_2-15-2024/') x = list(p.glob('**/*.html.liquid')) for posfile in x: file = str(posfile) htmlfile = open(file, 'r', encoding='utf-8') file2 = open(file, 'r', encoding='utf-8') # finfile = file.split('/')[-1] code = file2.readlines() inclst = [include for include in code if '{% include' in include] print(inclst) for lines in code: if '{% include' in lines: pass soup = BeautifulSoup(htmlfile, 'html.parser') clst = [node['class'] for node in soup.find_all() if node.has_attr('class')] # print(clst) ilst = [node['id'] for node in soup.find_all() if node.has_attr('id')] # print(ilst) # ids = [] # classes = [] # if "class=" in lines: # tmpline = lines # if "id=" not in tmpline: # try: # htmlclass = re.search('class="(.*)"', tmpline) # classes.append(htmlclass.group(1)) # except AttributeError as t: # print("Class: ", t) # pass # finally: # pass # elif "id=" in lines: # try: # id = re.search('id="(.*)"', lines) # ids.append(id.group(1)) # except AttributeError as t: # print("ID: ", t) # pass # finally: # pass # else: # pass # seconddict = { 'file': finfile, 'attributes': { 'id' : ids, 'class': classes } } # MASLIST.append(seconddict) # # df = pd.json_normalize(MASLIST) # print(df) # kp = (df.set_index('file').apply(lambda x: x.str.split(",").explode()).reset_index()) # ksp = df.set_index('file').apply(lambda x: df['attributes.class']([x.split(',') ])) # print(ksp) # kp2 = (df.set_index('file').apply(lambda x: x.str.split(" ").explode('attributes.class')).reset_index()) # print(kp2)