Finished the class and liquid variable extraction script.
This commit is contained in:
@ -11,7 +11,7 @@ import re
|
||||
# MASDICT = {'file': "", 'attributes' : { 'id': [], 'class': [] } }
|
||||
pp = pprint.PrettyPrinter(indent=4)
|
||||
MASDICT = {}
|
||||
MASLIST = []
|
||||
MASLIST = {}
|
||||
PARS = HTMLParser
|
||||
|
||||
|
||||
@ -45,7 +45,7 @@ def main():
|
||||
ilst = [node["id"] for node in soup.find_all() if node.has_attr("id")]
|
||||
flatid = flatten(ilst)
|
||||
|
||||
MASLIST.append({strpfile: {"includes": inclist,"liquid": quidlist, "class": flatclass, "id": flatid}})
|
||||
MASLIST.update({strpfile: {"includes": inclist,"liquid": quidlist, "class": flatclass, "id": flatid}})
|
||||
MASDICT = {"data": MASLIST}
|
||||
# pp.pprint(MASDICT)
|
||||
to_pandas(MASDICT)
|
||||
@ -65,7 +65,10 @@ def flatten(xs):
|
||||
|
||||
def to_pandas(obj):
|
||||
# df = pd.json_normalize(obj)
|
||||
df = pd.json_normalize(obj['data'])
|
||||
# df = pd.DataFrame(obj['data'])
|
||||
results = obj['data']
|
||||
df = pd.DataFrame.from_dict(results, orient='index')
|
||||
df = df.reset_index().rename(columns={"index": "filename"})
|
||||
print(df)
|
||||
df.to_csv('/Users/normrasmussen/Downloads/example_liquid.csv')
|
||||
|
||||
|
||||
Reference in New Issue
Block a user