48 lines
1.2 KiB
Python
48 lines
1.2 KiB
Python
import csv
|
|
import pandas as pd
|
|
import re
|
|
|
|
LISTTUPLE = []
|
|
LINELIST = []
|
|
COUNT = 0
|
|
DOMAIN_DICT = {}
|
|
df = pd.DataFrame()
|
|
|
|
with open('./Workflows_js_nodes.js', 'r') as file:
|
|
for num, line in enumerate(file, 1):
|
|
if "<<<" in line:
|
|
LINELIST.append(num)
|
|
if ">>>" in line:
|
|
LINELIST.append(num)
|
|
LINELIST = sorted(LINELIST)
|
|
# print(LINELIST)
|
|
x = len(LINELIST)
|
|
|
|
try:
|
|
while COUNT in range(x):
|
|
COUNT += 1
|
|
temp_tupe = (LINELIST[0], LINELIST[1])
|
|
LISTTUPLE.append(temp_tupe)
|
|
LINELIST = LINELIST[2:]
|
|
# LINELIST.pop(1)
|
|
except IndexError as e:
|
|
pass
|
|
|
|
for pagetuple in LISTTUPLE:
|
|
res_list = []
|
|
domain_line = int(pagetuple[0]-2)
|
|
seg_start = int(pagetuple[0]-1)
|
|
seg_end = int(pagetuple[1]-1)
|
|
with open('./Workflows_js_nodes.js', 'r') as file:
|
|
lines = file.readlines()
|
|
title = lines[domain_line][4:-1]
|
|
segment = lines[seg_start:seg_end]
|
|
for line in segment:
|
|
result = re.search(r"(?:'@[a-z|.]+.[a-z]{3})", line)
|
|
if result:
|
|
res = result.group()[1:]
|
|
res_list.append(res)
|
|
DOMAIN_DICT[title] = res_list
|
|
df = df.from_dict(DOMAIN_DICT, orient='index')
|
|
df.to_csv('~/Downloads/test-anthodomains.csv')
|