import csv from Levenshtein import distance as lev import pandas as pd import itertools import sys peopleCsv = "/Users/normrasmussen/Downloads/TalkspaceAllLearners.csv" def readCsv(peopleCsv): people = [] readExport = pd.read_csv( peopleCsv, usecols=['Learner Full Name', 'Email'], skipinitialspace=True, #index_col=True, ) people.extend(readExport['Email'].tolist()) startCompare(peopleCsv, people, readExport) # itertools combinations def startCompare(peopleCsv, people, readExport): email1 = [] email2 = [] for name1, name2, in itertools.combinations(people, 2): #print(name1, name2) - prints all pairs, working so far. distance = lev(name1, name2) #print(distance) - successfully returns numbers if distance > 0 and distance < 2: email1.append(name1) email2.append(name2) writenewColumn(email1, email2, peopleCsv, readExport) def writenewColumn(email1, email2, peopleCsv, readExport): df = pd.DataFrame(readExport) print(df) df['Email1'] = pd.Series(email1) df['Email2'] = pd.Series(email2) df.drop_duplicates('Email1', inplace=True) df.drop_duplicates('Email2', inplace=True) df.drop_duplicates( subset=['Email1', 'Email2']) #keep = 'last').reset_index(drop=True) writeLst = df.to_csv( '/Users/normrasmussen/Downloads/TalkspaceDupes_singlechange.csv', ) if __name__ == "__main__": readCsv(peopleCsv)