1.9 KiB
1.9 KiB
title, date, tags, author, showToc, TocOpen, draft, hidemeta, description, disableHLJS, disableShare, disableHLJS, hideSummary, searchHidden, ShowReadingTime, ShowBreadCrumbs, ShowPostNavLinks, ShowWordCount, ShowRssButtonInSectionTermList, UseHugoToc, cover
| title | date | tags | author | showToc | TocOpen | draft | hidemeta | description | disableHLJS | disableShare | disableHLJS | hideSummary | searchHidden | ShowReadingTime | ShowBreadCrumbs | ShowPostNavLinks | ShowWordCount | ShowRssButtonInSectionTermList | UseHugoToc | cover | |||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Using Python to Parse File Contents | 2023-11-02T13:57:07-04:00 | Me | true | false | true | false | I often find myself with various files that need to be parsed and transferred to a CSV. This is how I use python to parse a long and convoluted file. | true | false | false | false | true | true | true | true | true | true | true |
|
Full Script
import csv
import pandas as pd
import re
LISTTUPLE = []
LINELIST = []
COUNT = 0
DOMAIN_DICT = {}
df = pd.DataFrame()
with open('./Workflows_js_nodes.js', 'r') as file:
for num, line in enumerate(file, 1):
if "<<<" in line:
LINELIST.append(num)
if ">>>" in line:
LINELIST.append(num)
LINELIST = sorted(LINELIST)
# print(LINELIST)
x = len(LINELIST)
try:
while COUNT in range(x):
COUNT += 1
temp_tupe = (LINELIST[0], LINELIST[1])
LISTTUPLE.append(temp_tupe)
LINELIST = LINELIST[2:]
# LINELIST.pop(1)
except IndexError as e:
pass
for pagetuple in LISTTUPLE:
res_list = []
domain_line = int(pagetuple[0]-2)
seg_start = int(pagetuple[0]-1)
seg_end = int(pagetuple[1]-1)
with open('./Workflows_js_nodes.js', 'r') as file:
lines = file.readlines()
title = lines[domain_line][4:-1]
segment = lines[seg_start:seg_end]
for line in segment:
result = re.search(r"(?:'@[a-z|.]+.[a-z]{3})", line)
if result:
res = result.group()[1:]
res_list.append(res)
DOMAIN_DICT[title] = res_list
df = df.from_dict(DOMAIN_DICT, orient='index')
df.to_csv('~/export_file.csv')