Gainsight/CustomerNotes/Anthology/extract_domain_mapping.py

import csv
import pandas as pd
import re

LISTTUPLE = []
LINELIST = []
COUNT = 0
DOMAIN_DICT = {}
df = pd.DataFrame()

with open('./Workflows_js_nodes.js', 'r') as file:
    for num, line in enumerate(file, 1):
        if "<<<" in line:
            LINELIST.append(num)
        if ">>>" in line:
            LINELIST.append(num)
LINELIST = sorted(LINELIST)
# print(LINELIST)
x = len(LINELIST)

try:
    while COUNT in range(x):
        COUNT += 1
        temp_tupe = (LINELIST[0], LINELIST[1])
        LISTTUPLE.append(temp_tupe)
        LINELIST = LINELIST[2:]
        # LINELIST.pop(1)
except IndexError as e:
    pass

for pagetuple in LISTTUPLE:
    res_list = []
    domain_line = int(pagetuple[0]-2)
    seg_start = int(pagetuple[0]-1)
    seg_end = int(pagetuple[1]-1)
    with open('./Workflows_js_nodes.js', 'r') as file:
        lines = file.readlines()
        title = lines[domain_line][4:-1]
        segment = lines[seg_start:seg_end]
        for line in segment:
            result = re.search(r"(?:'@[a-z|.]+.[a-z]{3})", line)
            if result:
                res = result.group()[1:]
                res_list.append(res)
        DOMAIN_DICT[title] = res_list
df = df.from_dict(DOMAIN_DICT, orient='index')
df.to_csv('~/Downloads/test-anthodomains.csv')