Files
Gainsight/Scripts/API_Tests/manual_lps.py

60 lines
2.2 KiB
Python
Raw Permalink Normal View History

from collections import Counter
import pandas as pd
import glob
import re
basecsv = "/Users/normrasmussen/Documents/Northpass/Scripts/Skuid_LPs/Skuid_MCA125.csv"
directory = "/Users/normrasmussen/Documents/Northpass/Scripts/Skuid_LPs/LPCSVs/*.csv"
lpcsv = "/Users/normrasmussen/Documents/Northpass/Scripts/Skuid_LPs/skuidlps.csv"
# Unfortunately, what this needs is a CSV per learning path/MCA export. That will be painful to run.
def manualFunc(lpcsv, directory):
lps = pd.read_csv(lpcsv)
lpcourses = lps.groupby(["Learning Path"])["Course Name"]
coursenums = lpcourses.nunique()
coursenums = coursenums.to_dict()
lpnames = coursenums.keys()
# print(lpnames)
# num = re.match(r"(\d{2})")
# for row in coursenums:
# print(row)
# print(coursenums.keys())
# keys() and values() will show the respective columns. Adding the full LP string will show the values.
# What if... I do something like
# The next step is to compare each of the items in this list with the results of each fname. I wonder if I can do it by regex?
lp_dict = {}
lp_list = []
for fname in glob.glob(directory):
readData = pd.read_csv(
fname,
)
emailGroups = readData.groupby(["Email", "Learner Full Name"])[
"Course Name"
].nunique()
peoples = emailGroups.to_dict()
num = re.findall(r"(\d{2})", fname)
for lp, nums in coursenums.items():
file_num = str(num[0])
file_num = f"{file_num:02}"
lp_num = re.findall(r"(\d{2})", lp)
lp_num = str(lp_num[0])
if file_num == lp_num:
lp_list = []
# print(file_num+"+"+lp_num)
for person, completions in peoples.items():
if completions == nums:
lp_list.append(person)
ppl_todict = {fname: lp_list}
lp_dict.update(ppl_todict)
final = pd.DataFrame({key: pd.Series(value) for key, value in lp_dict.items()})
final.to_csv(
"/Users/normrasmussen/Documents/Northpass/Scripts/Skuid_LPs/finaltest3.csv"
)
if __name__ == "__main__":
manualFunc(lpcsv, directory)