Neovim Search and Replace commands moved to final stages, no longer draft. Added some shell-ideas for later writing.
This commit is contained in:
83
content/posts/parsing_files_with_python.md
Normal file
83
content/posts/parsing_files_with_python.md
Normal file
@ -0,0 +1,83 @@
|
||||
---
|
||||
title: 'Using Python to Parse File Contents'
|
||||
date: 2023-11-02T13:57:07-04:00
|
||||
tags: [""]
|
||||
author: "Me"
|
||||
showToc: true
|
||||
TocOpen: false
|
||||
draft: true
|
||||
hidemeta: false
|
||||
description: "I often find myself with various files that need to be parsed and transferred to a CSV. This is how I use
|
||||
python to parse a long and convoluted file."
|
||||
disableHLJS: true
|
||||
disableShare: false
|
||||
disableHLJS: false
|
||||
hideSummary: false
|
||||
searchHidden: true
|
||||
ShowReadingTime: true
|
||||
ShowBreadCrumbs: true
|
||||
ShowPostNavLinks: true
|
||||
ShowWordCount: true
|
||||
ShowRssButtonInSectionTermList: true
|
||||
UseHugoToc: true
|
||||
cover:
|
||||
image: ""
|
||||
alt: ""
|
||||
caption: ""
|
||||
relative: false
|
||||
hidden: true
|
||||
---
|
||||
|
||||
|
||||
### Full Script
|
||||
|
||||
```python
|
||||
import csv
|
||||
import pandas as pd
|
||||
import re
|
||||
|
||||
LISTTUPLE = []
|
||||
LINELIST = []
|
||||
COUNT = 0
|
||||
DOMAIN_DICT = {}
|
||||
df = pd.DataFrame()
|
||||
|
||||
with open('./Workflows_js_nodes.js', 'r') as file:
|
||||
for num, line in enumerate(file, 1):
|
||||
if "<<<" in line:
|
||||
LINELIST.append(num)
|
||||
if ">>>" in line:
|
||||
LINELIST.append(num)
|
||||
LINELIST = sorted(LINELIST)
|
||||
# print(LINELIST)
|
||||
x = len(LINELIST)
|
||||
|
||||
try:
|
||||
while COUNT in range(x):
|
||||
COUNT += 1
|
||||
temp_tupe = (LINELIST[0], LINELIST[1])
|
||||
LISTTUPLE.append(temp_tupe)
|
||||
LINELIST = LINELIST[2:]
|
||||
# LINELIST.pop(1)
|
||||
except IndexError as e:
|
||||
pass
|
||||
|
||||
for pagetuple in LISTTUPLE:
|
||||
res_list = []
|
||||
domain_line = int(pagetuple[0]-2)
|
||||
seg_start = int(pagetuple[0]-1)
|
||||
seg_end = int(pagetuple[1]-1)
|
||||
with open('./Workflows_js_nodes.js', 'r') as file:
|
||||
lines = file.readlines()
|
||||
title = lines[domain_line][4:-1]
|
||||
segment = lines[seg_start:seg_end]
|
||||
for line in segment:
|
||||
result = re.search(r"(?:'@[a-z|.]+.[a-z]{3})", line)
|
||||
if result:
|
||||
res = result.group()[1:]
|
||||
res_list.append(res)
|
||||
DOMAIN_DICT[title] = res_list
|
||||
df = df.from_dict(DOMAIN_DICT, orient='index')
|
||||
df.to_csv('~/export_file.csv')
|
||||
|
||||
```
|
||||
Reference in New Issue
Block a user