84 lines
1.9 KiB
Markdown
84 lines
1.9 KiB
Markdown
|
|
---
|
||
|
|
title: 'Using Python to Parse File Contents'
|
||
|
|
date: 2023-11-02T13:57:07-04:00
|
||
|
|
tags: [""]
|
||
|
|
author: "Me"
|
||
|
|
showToc: true
|
||
|
|
TocOpen: false
|
||
|
|
draft: true
|
||
|
|
hidemeta: false
|
||
|
|
description: "I often find myself with various files that need to be parsed and transferred to a CSV. This is how I use
|
||
|
|
python to parse a long and convoluted file."
|
||
|
|
disableHLJS: true
|
||
|
|
disableShare: false
|
||
|
|
disableHLJS: false
|
||
|
|
hideSummary: false
|
||
|
|
searchHidden: true
|
||
|
|
ShowReadingTime: true
|
||
|
|
ShowBreadCrumbs: true
|
||
|
|
ShowPostNavLinks: true
|
||
|
|
ShowWordCount: true
|
||
|
|
ShowRssButtonInSectionTermList: true
|
||
|
|
UseHugoToc: true
|
||
|
|
cover:
|
||
|
|
image: ""
|
||
|
|
alt: ""
|
||
|
|
caption: ""
|
||
|
|
relative: false
|
||
|
|
hidden: true
|
||
|
|
---
|
||
|
|
|
||
|
|
|
||
|
|
### Full Script
|
||
|
|
|
||
|
|
```python
|
||
|
|
import csv
|
||
|
|
import pandas as pd
|
||
|
|
import re
|
||
|
|
|
||
|
|
LISTTUPLE = []
|
||
|
|
LINELIST = []
|
||
|
|
COUNT = 0
|
||
|
|
DOMAIN_DICT = {}
|
||
|
|
df = pd.DataFrame()
|
||
|
|
|
||
|
|
with open('./Workflows_js_nodes.js', 'r') as file:
|
||
|
|
for num, line in enumerate(file, 1):
|
||
|
|
if "<<<" in line:
|
||
|
|
LINELIST.append(num)
|
||
|
|
if ">>>" in line:
|
||
|
|
LINELIST.append(num)
|
||
|
|
LINELIST = sorted(LINELIST)
|
||
|
|
# print(LINELIST)
|
||
|
|
x = len(LINELIST)
|
||
|
|
|
||
|
|
try:
|
||
|
|
while COUNT in range(x):
|
||
|
|
COUNT += 1
|
||
|
|
temp_tupe = (LINELIST[0], LINELIST[1])
|
||
|
|
LISTTUPLE.append(temp_tupe)
|
||
|
|
LINELIST = LINELIST[2:]
|
||
|
|
# LINELIST.pop(1)
|
||
|
|
except IndexError as e:
|
||
|
|
pass
|
||
|
|
|
||
|
|
for pagetuple in LISTTUPLE:
|
||
|
|
res_list = []
|
||
|
|
domain_line = int(pagetuple[0]-2)
|
||
|
|
seg_start = int(pagetuple[0]-1)
|
||
|
|
seg_end = int(pagetuple[1]-1)
|
||
|
|
with open('./Workflows_js_nodes.js', 'r') as file:
|
||
|
|
lines = file.readlines()
|
||
|
|
title = lines[domain_line][4:-1]
|
||
|
|
segment = lines[seg_start:seg_end]
|
||
|
|
for line in segment:
|
||
|
|
result = re.search(r"(?:'@[a-z|.]+.[a-z]{3})", line)
|
||
|
|
if result:
|
||
|
|
res = result.group()[1:]
|
||
|
|
res_list.append(res)
|
||
|
|
DOMAIN_DICT[title] = res_list
|
||
|
|
df = df.from_dict(DOMAIN_DICT, orient='index')
|
||
|
|
df.to_csv('~/export_file.csv')
|
||
|
|
|
||
|
|
```
|