DV,Walmart, API Tests
This commit is contained in:
@ -1,15 +1,48 @@
|
||||
import csv
|
||||
from Levenshtein import distance as lev
|
||||
import pandas as pd
|
||||
import itertools
|
||||
import sys
|
||||
|
||||
x = lev("nrasmussen", "mrasmussen")
|
||||
print(x)
|
||||
peopleCsv = "/Users/normrasmussen/Downloads/TalkspaceAllLearners.csv"
|
||||
|
||||
#def readSpreadsheet():
|
||||
# with open('talkspace.csv', rb) as csvfile:
|
||||
# for line in csvfile.readlines():
|
||||
# array = line.split(',')
|
||||
# emailcol = array[3]
|
||||
def readCsv(peopleCsv):
|
||||
people = []
|
||||
readExport = pd.read_csv(
|
||||
peopleCsv,
|
||||
usecols=['Learner Full Name', 'Email'],
|
||||
skipinitialspace=True,
|
||||
#index_col=True,
|
||||
)
|
||||
people.extend(readExport['Email'].tolist())
|
||||
startCompare(peopleCsv, people, readExport)
|
||||
|
||||
#def compareemails():
|
||||
# levenshtein.distance(email1, email2)
|
||||
# itertools combinations
|
||||
def startCompare(peopleCsv, people, readExport):
|
||||
email1 = []
|
||||
email2 = []
|
||||
for name1, name2, in itertools.combinations(people, 2):
|
||||
#print(name1, name2) - prints all pairs, working so far.
|
||||
distance = lev(name1, name2)
|
||||
#print(distance) - successfully returns numbers
|
||||
if distance > 0 and distance < 2:
|
||||
email1.append(name1)
|
||||
email2.append(name2)
|
||||
writenewColumn(email1, email2, peopleCsv, readExport)
|
||||
|
||||
def writenewColumn(email1, email2, peopleCsv, readExport):
|
||||
df = pd.DataFrame(readExport)
|
||||
print(df)
|
||||
df['Email1'] = pd.Series(email1)
|
||||
df['Email2'] = pd.Series(email2)
|
||||
df.drop_duplicates('Email1', inplace=True)
|
||||
df.drop_duplicates('Email2', inplace=True)
|
||||
df.drop_duplicates(
|
||||
subset=['Email1', 'Email2'])
|
||||
#keep = 'last').reset_index(drop=True)
|
||||
writeLst = df.to_csv(
|
||||
'/Users/normrasmussen/Downloads/TalkspaceDupes_singlechange.csv',
|
||||
)
|
||||
|
||||
if __name__ == "__main__":
|
||||
readCsv(peopleCsv)
|
||||
|
||||
@ -40,9 +40,11 @@ def copytoDash(latestdownload, currentDash):
|
||||
regex="Unname"
|
||||
),axis=1,
|
||||
inplace=True)
|
||||
|
||||
def progressFormat(latestdownload, currentDash, readExport):
|
||||
df = pd.DataFrame(readExport)
|
||||
readExport['Progress_replace'] = readExport['Progress'].str.replace('%','')
|
||||
readExport['Progress_replace'] = pd.to_numeric(readExport['Progress_replace'])
|
||||
copiedData = readExport.copy()
|
||||
bringtoExcel(latestdownload, currentDash, copiedData)
|
||||
#cleanitUp(latestdownload, currentDash, copiedData)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user