--- title: 'Using Python to Parse File Contents' date: 2023-11-02T13:57:07-04:00 tags: [""] author: "Me" showToc: true TocOpen: false draft: true hidemeta: false description: "I often find myself with various files that need to be parsed and transferred to a CSV. This is how I use python to parse a long and convoluted file." disableHLJS: true disableShare: false disableHLJS: false hideSummary: false searchHidden: true ShowReadingTime: true ShowBreadCrumbs: true ShowPostNavLinks: true ShowWordCount: true ShowRssButtonInSectionTermList: true UseHugoToc: true cover: image: "" alt: "" caption: "" relative: false hidden: true --- ### Full Script ```python import csv import pandas as pd import re LISTTUPLE = [] LINELIST = [] COUNT = 0 DOMAIN_DICT = {} df = pd.DataFrame() with open('./Workflows_js_nodes.js', 'r') as file: for num, line in enumerate(file, 1): if "<<<" in line: LINELIST.append(num) if ">>>" in line: LINELIST.append(num) LINELIST = sorted(LINELIST) # print(LINELIST) x = len(LINELIST) try: while COUNT in range(x): COUNT += 1 temp_tupe = (LINELIST[0], LINELIST[1]) LISTTUPLE.append(temp_tupe) LINELIST = LINELIST[2:] # LINELIST.pop(1) except IndexError as e: pass for pagetuple in LISTTUPLE: res_list = [] domain_line = int(pagetuple[0]-2) seg_start = int(pagetuple[0]-1) seg_end = int(pagetuple[1]-1) with open('./Workflows_js_nodes.js', 'r') as file: lines = file.readlines() title = lines[domain_line][4:-1] segment = lines[seg_start:seg_end] for line in segment: result = re.search(r"(?:'@[a-z|.]+.[a-z]{3})", line) if result: res = result.group()[1:] res_list.append(res) DOMAIN_DICT[title] = res_list df = df.from_dict(DOMAIN_DICT, orient='index') df.to_csv('~/export_file.csv') ```