Finished the class and liquid variable extraction script.
This commit is contained in:
@ -11,7 +11,7 @@ import re
|
|||||||
# MASDICT = {'file': "", 'attributes' : { 'id': [], 'class': [] } }
|
# MASDICT = {'file': "", 'attributes' : { 'id': [], 'class': [] } }
|
||||||
pp = pprint.PrettyPrinter(indent=4)
|
pp = pprint.PrettyPrinter(indent=4)
|
||||||
MASDICT = {}
|
MASDICT = {}
|
||||||
MASLIST = []
|
MASLIST = {}
|
||||||
PARS = HTMLParser
|
PARS = HTMLParser
|
||||||
|
|
||||||
|
|
||||||
@ -45,7 +45,7 @@ def main():
|
|||||||
ilst = [node["id"] for node in soup.find_all() if node.has_attr("id")]
|
ilst = [node["id"] for node in soup.find_all() if node.has_attr("id")]
|
||||||
flatid = flatten(ilst)
|
flatid = flatten(ilst)
|
||||||
|
|
||||||
MASLIST.append({strpfile: {"includes": inclist,"liquid": quidlist, "class": flatclass, "id": flatid}})
|
MASLIST.update({strpfile: {"includes": inclist,"liquid": quidlist, "class": flatclass, "id": flatid}})
|
||||||
MASDICT = {"data": MASLIST}
|
MASDICT = {"data": MASLIST}
|
||||||
# pp.pprint(MASDICT)
|
# pp.pprint(MASDICT)
|
||||||
to_pandas(MASDICT)
|
to_pandas(MASDICT)
|
||||||
@ -65,7 +65,10 @@ def flatten(xs):
|
|||||||
|
|
||||||
def to_pandas(obj):
|
def to_pandas(obj):
|
||||||
# df = pd.json_normalize(obj)
|
# df = pd.json_normalize(obj)
|
||||||
df = pd.json_normalize(obj['data'])
|
# df = pd.DataFrame(obj['data'])
|
||||||
|
results = obj['data']
|
||||||
|
df = pd.DataFrame.from_dict(results, orient='index')
|
||||||
|
df = df.reset_index().rename(columns={"index": "filename"})
|
||||||
print(df)
|
print(df)
|
||||||
df.to_csv('/Users/normrasmussen/Downloads/example_liquid.csv')
|
df.to_csv('/Users/normrasmussen/Downloads/example_liquid.csv')
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user