import csv from Levenshtein import distance as lev import pandas as pd import itertools import sys peopleCsv = "/Users/normrasmussen/Downloads/TalkspaceAllLearners.csv" def readCsv(peopleCsv): people = [] readExport = pd.read_csv( peopleCsv, usecols=["Learner Full Name", "Email"], skipinitialspace=True, # index_col=True, ) people.extend(readExport["Email"].tolist()) startCompare(peopleCsv, people, readExport) # itertools combinations def startCompare(peopleCsv, people, readExport): email1 = [] email2 = [] for ( name1, name2, ) in itertools.combinations(people, 2): # print(name1, name2) - prints all pairs, working so far. distance = lev(name1, name2) # print(distance) - successfully returns numbers if distance > 0 and distance < 2: email1.append(name1) email2.append(name2) writenewColumn(email1, email2, peopleCsv, readExport) def writenewColumn(email1, email2, peopleCsv, readExport): df = pd.DataFrame(readExport) print(df) df["Email1"] = pd.Series(email1) df["Email2"] = pd.Series(email2) df.drop_duplicates("Email1", inplace=True) df.drop_duplicates("Email2", inplace=True) df.drop_duplicates(subset=["Email1", "Email2"]) # keep = 'last').reset_index(drop=True) writeLst = df.to_csv( "/Users/normrasmussen/Downloads/TalkspaceDupes_singlechange.csv", ) if __name__ == "__main__": readCsv(peopleCsv)