from collections import Counter import pandas as pd import glob import re basecsv = "/Users/normrasmussen/Documents/Northpass/Scripts/Skuid_LPs/Skuid_MCA125.csv" directory = "/Users/normrasmussen/Documents/Northpass/Scripts/Skuid_LPs/LPCSVs/*.csv" lpcsv = "/Users/normrasmussen/Documents/Northpass/Scripts/Skuid_LPs/skuidlps.csv" # Unfortunately, what this needs is a CSV per learning path/MCA export. That will be painful to run. def manualFunc(lpcsv, directory): lps = pd.read_csv(lpcsv) lpcourses = lps.groupby(["Learning Path"])["Course Name"] coursenums = lpcourses.nunique() coursenums = coursenums.to_dict() lpnames = coursenums.keys() # print(lpnames) # num = re.match(r"(\d{2})") # for row in coursenums: # print(row) # print(coursenums.keys()) # keys() and values() will show the respective columns. Adding the full LP string will show the values. # What if... I do something like # The next step is to compare each of the items in this list with the results of each fname. I wonder if I can do it by regex? lp_dict = {} lp_list = [] for fname in glob.glob(directory): readData = pd.read_csv( fname, ) emailGroups = readData.groupby(["Email", "Learner Full Name"])[ "Course Name" ].nunique() peoples = emailGroups.to_dict() num = re.findall(r"(\d{2})", fname) for lp, nums in coursenums.items(): file_num = str(num[0]) file_num = f"{file_num:02}" lp_num = re.findall(r"(\d{2})", lp) lp_num = str(lp_num[0]) if file_num == lp_num: lp_list = [] # print(file_num+"+"+lp_num) for person, completions in peoples.items(): if completions == nums: lp_list.append(person) ppl_todict = {fname: lp_list} lp_dict.update(ppl_todict) final = pd.DataFrame({key: pd.Series(value) for key, value in lp_dict.items()}) final.to_csv( "/Users/normrasmussen/Documents/Northpass/Scripts/Skuid_LPs/finaltest3.csv" ) if __name__ == "__main__": manualFunc(lpcsv, directory)