from collections import Counter import pandas as pd basecsv = "/Users/normrasmussen/Documents/Northpass/Scripts/Skuid_LPs/Skuid_MCA125.csv" lpcsv = "/Users/normrasmussen/Documents/Northpass/Scripts/Skuid_LPs/skuidlps.csv" """ Example multivalue dictionary dict = {key1: [value1, value2, value3, value4], key2: [value5, value6, value 7], } So this could be used for each learning path. In other words: learning_paths = {'01:Skuid Ethos' : ["Congratulations", "Create", "Skuid Resources"]} etc etc Ideally, we will add Alexa's "levels" in this dictionary as well. Could we do: learning_paths = {'Level_1': [{'01:Skuid Ethos' : ["Congratulations", "Create", "Skuid Resources"]}, {'02:Composer' : ["Overview", "Get Started with Composer", "Manage Pages"}] {'03:Design System Studio' : ["Get Started with Design Systems", etc etc]}, 'Level_2': [{'10 - Data' : ["Tips to Optimize", "Smarter Conditions"]}, {'11-Components': ["Battle", "Engage"]}, ] } How to create this by automation? """ def lpLevels(basecsv, lpcsv): levels = pd.read_csv( lpcsv, index_col=1, ) newDf = levels.groupby("Learning Path") newDf2 = newDf.apply(lambda x: x["Course Name"].unique()) learningpaths = newDf2.apply(pd.Series) learningpaths.rename_axis(index=0) mainFunc(basecsv, learningpaths) # print(levels.Level.unique()) # Print only unique values from the Level column def mainFunc(basecsv, learningpaths): # Part 1 readData = pd.read_csv( basecsv, ) group = readData.groupby("Learner Full Name") df2 = group.apply(lambda x: x["Course Name"].unique()) df2 = df2.apply(pd.Series, dtype="string") # print(df2) # This prints a dataframe with the learner's name as the index column and the courses as adjacent columns # Part 2 courses = learningpaths.set_index(0) # print(courses) # lp_dict = learningpaths.to_dict("index") # courses = lp_dict.values() # Part 3 print(df2.isin(df2)) # This produces a bunch of T/F in the dataframe. Is the solution to do: # for courses in lp_dict, for row(person) in readData # if number of True == length/# of values in courses # Add to "Finished List" # df3 = df2.columns # print(df3) # for name in df3.items(): # print(f"name: {name}") if __name__ == "__main__": # mainFunc(basecsv) lpLevels(basecsv, lpcsv)