Gainsight/Scripts/all_default_class_names.py

from html.parser import HTMLParser
import json
import pandas as pd
from pathlib import Path
from bs4 import BeautifulSoup
import pprint
import re

# from collections import Iterable

# MASDICT = {'file': "", 'attributes' : { 'id': [], 'class': [] } }
pp = pprint.PrettyPrinter(indent=4)
MASDICT = {}
MASLIST = {}
PARS = HTMLParser


def main():
    p = Path(
        "/Users/normrasmussen/Documents/Work/Custom_Templates/current_templates_2-15-2024/"
    )
    x = list(p.glob("**/*.html.liquid"))
    for posfile in x:
        file = str(posfile)
        strpfile = file.split("/")[7]
        htmlfile = open(file, "r", encoding="utf-8")
        file2 = open(file, "r", encoding="utf-8")
        code = file2.readlines()
        liq = [liquid for liquid in code if re.search('{{(.*)}}', liquid)]
        quidlist = []
        for quid in liq:
            liqcludes = quid.strip()
            liqclude = re.search('{{(.*)}}', liqcludes)
            quidlist.append(liqclude.group(0))
        inclst = [include for include in code if "{% include" in include]
        inclist = []
        for item in inclst:
            includes = item.strip().split(" ")[2].replace('"','')
            inclist.append(includes)

        soup = BeautifulSoup(htmlfile, "html.parser")
        clst = [node["class"] for node in soup.find_all() if node.has_attr("class")]
        flatclass = flatten(clst)
        flatclass = [it for it in flatclass if "np" in it]
        ilst = [node["id"] for node in soup.find_all() if node.has_attr("id")]
        flatid = flatten(ilst)

        MASLIST.update({strpfile: {"includes": inclist,"liquid": quidlist, "class": flatclass, "id": flatid}})
        MASDICT = {"data": MASLIST}
    # pp.pprint(MASDICT)
    to_pandas(MASDICT)


def flatten(xs):
    # Initialize list for this layer
    flat_list = []
    for x in xs:
        # If it's a list, recurse down and return the interior list
        if isinstance(x, list):
            flat_list += flatten(x)
        # Otherwise, add to this layer's list
        else:
            flat_list.append(x)
    return flat_list

def to_pandas(obj):
    # df = pd.json_normalize(obj)
    # df = pd.DataFrame(obj['data'])
    results = obj['data']
    df = pd.DataFrame.from_dict(results, orient='index')
    df = df.reset_index().rename(columns={"index": "filename"})
    print(df)
    df.to_csv('/Users/normrasmussen/Downloads/example_liquid.csv')

if __name__ == "__main__":
    main()
Started a script to grab all class names from default templates. 2024-10-11 16:50:08 -04:00			`from html.parser import HTMLParser`
Mizuno and Aiim templates. Almost done with the default class and liquid extraction script. 2024-10-16 17:15:05 -04:00			`import json`
			`import pandas as pd`
Started a script to grab all class names from default templates. 2024-10-11 16:50:08 -04:00			`from pathlib import Path`
Skuid templates were changed on a demo. Williams-Sonoma improvements. Script was updated slightly. 2024-10-15 17:02:33 -04:00			`from bs4 import BeautifulSoup`
Mizuno and Aiim templates. Almost done with the default class and liquid extraction script. 2024-10-16 17:15:05 -04:00			`import pprint`
			`import re`

			`# from collections import Iterable`
Started a script to grab all class names from default templates. 2024-10-11 16:50:08 -04:00
			`# MASDICT = {'file': "", 'attributes' : { 'id': [], 'class': [] } }`
Mizuno and Aiim templates. Almost done with the default class and liquid extraction script. 2024-10-16 17:15:05 -04:00			`pp = pprint.PrettyPrinter(indent=4)`
			`MASDICT = {}`
Finished the class and liquid variable extraction script. 2024-10-17 16:22:21 -04:00			`MASLIST = {}`
Started a script to grab all class names from default templates. 2024-10-11 16:50:08 -04:00			`PARS = HTMLParser`

Skuid templates were changed on a demo. Williams-Sonoma improvements. Script was updated slightly. 2024-10-15 17:02:33 -04:00
Mizuno and Aiim templates. Almost done with the default class and liquid extraction script. 2024-10-16 17:15:05 -04:00			`def main():`
			`p = Path(`
			`"/Users/normrasmussen/Documents/Work/Custom_Templates/current_templates_2-15-2024/"`
			`)`
			`x = list(p.glob("*/.html.liquid"))`
			`for posfile in x:`
			`file = str(posfile)`
			`strpfile = file.split("/")[7]`
			`htmlfile = open(file, "r", encoding="utf-8")`
			`file2 = open(file, "r", encoding="utf-8")`
			`code = file2.readlines()`
			`liq = [liquid for liquid in code if re.search('{{(.*)}}', liquid)]`
			`quidlist = []`
			`for quid in liq:`
			`liqcludes = quid.strip()`
			`liqclude = re.search('{{(.*)}}', liqcludes)`
			`quidlist.append(liqclude.group(0))`
			`inclst = [include for include in code if "{% include" in include]`
			`inclist = []`
			`for item in inclst:`
			`includes = item.strip().split(" ")[2].replace('"','')`
			`inclist.append(includes)`

			`soup = BeautifulSoup(htmlfile, "html.parser")`
			`clst = [node["class"] for node in soup.find_all() if node.has_attr("class")]`
			`flatclass = flatten(clst)`
			`flatclass = [it for it in flatclass if "np" in it]`
			`ilst = [node["id"] for node in soup.find_all() if node.has_attr("id")]`
			`flatid = flatten(ilst)`

Finished the class and liquid variable extraction script. 2024-10-17 16:22:21 -04:00			`MASLIST.update({strpfile: {"includes": inclist,"liquid": quidlist, "class": flatclass, "id": flatid}})`
Mizuno and Aiim templates. Almost done with the default class and liquid extraction script. 2024-10-16 17:15:05 -04:00			`MASDICT = {"data": MASLIST}`
			`# pp.pprint(MASDICT)`
			`to_pandas(MASDICT)`


			`def flatten(xs):`
			`# Initialize list for this layer`
			`flat_list = []`
			`for x in xs:`
			`# If it's a list, recurse down and return the interior list`
			`if isinstance(x, list):`
			`flat_list += flatten(x)`
			`# Otherwise, add to this layer's list`
			`else:`
			`flat_list.append(x)`
			`return flat_list`

			`def to_pandas(obj):`
			`# df = pd.json_normalize(obj)`
Finished the class and liquid variable extraction script. 2024-10-17 16:22:21 -04:00			`# df = pd.DataFrame(obj['data'])`
			`results = obj['data']`
			`df = pd.DataFrame.from_dict(results, orient='index')`
			`df = df.reset_index().rename(columns={"index": "filename"})`
Mizuno and Aiim templates. Almost done with the default class and liquid extraction script. 2024-10-16 17:15:05 -04:00			`print(df)`
			`df.to_csv('/Users/normrasmussen/Downloads/example_liquid.csv')`
Started a script to grab all class names from default templates. 2024-10-11 16:50:08 -04:00
Mizuno and Aiim templates. Almost done with the default class and liquid extraction script. 2024-10-16 17:15:05 -04:00			`if __name__ == "__main__":`
			`main()`