Walmart snafu with too much data. Some notes and other small changes.

This commit is contained in:
Norm Rasmussen
2023-02-07 18:07:54 -05:00
parent b2548e676a
commit f5fee9b499
4 changed files with 76 additions and 20 deletions

View File

@ -49,7 +49,7 @@ function findRow(course, avgProgress, lastColumn) {
}
}
writeSheet.getRange(numRows, 1).setValue("Average Progress across all Courses");
writeSheet.getRange(numRows, lastColumn+1).setValue(avgProgress(progArray));
writeSheet.getRange(numRows, lastColumn+1).setValue(averageProgress(progArray));
}
function percentDiff(prevVal, avgProgress){
@ -71,7 +71,7 @@ function percentDiff(prevVal, avgProgress){
return percentChange
}
function avgProgress(progArray) {
function averageProgress(progArray) {
var progCalc = progArray => progArray.reduce((a,b) => a + b, 0) / progArray.length
return progCalc
}

View File

@ -1,6 +1,7 @@
import os
from datetime import date
import glob
import re
import shutil
import pandas as pd
@ -12,26 +13,31 @@ basefile = "Walmart_Weekly_Base.xlsx"
def copytemplate(rootdir, basefile):
segments = ["Group1", "Group2", "Group3", "Group4", "Group5"]
today = date.today()
today = today.strftime("%m.%d.%Y")
template = rootdir + basefile
# todayFile = f"Walmart-{today}.xlsx"
todayFile = "Walmart-Chris-Template.xlsx"
listfiles = glob.glob(downloadir + "WeeklyMCA*.csv")
if os.path.exists(rootdir):
shutil.copy2(template, rootdir + todayFile)
# dirfiles = os.listdir(rootdir)
# print(dirfiles)
# currentDash = rootdir + f"Walmart-{today}.xlsx"
currentDash = rootdir + "Walmart-Chris-Template.xlsx"
# print(currentDash)
findlatestExport(currentDash)
def findlatestExport(currentDash):
listfiles = glob.glob(downloadir + "*.csv")
latestdownload = max(listfiles, key=os.path.getctime)
# print(latestdownload)
copytoDash(latestdownload, currentDash)
for segment in segments:
todayFile = f"Walmart-{segment}-{today}.xlsx"
shutil.copy2(template, rootdir + todayFile)
currentDash = rootdir + f"Walmart-{segment}-{today}.xlsx"
for latestdownload in listfiles:
if re.search('(?:Group1)', currentDash) and re.search('(?:Group1)', latestdownload):
copytoDash(latestdownload, currentDash)
elif re.search('(?:Group2)', currentDash) and re.search('(?:Group2)', latestdownload):
copytoDash(latestdownload, currentDash)
elif re.search('(?:Group3)', currentDash) and re.search('(?:Group3)', latestdownload):
copytoDash(latestdownload, currentDash)
elif re.search('(?:Group4)', currentDash) and re.search('(?:Group4)', latestdownload):
copytoDash(latestdownload, currentDash)
elif re.search('(?:Group5)', currentDash) and re.search('(?:Group5)', latestdownload):
copytoDash(latestdownload, currentDash)
else:
print("No matching Looker Pulls or Templates Found!")
# TODO: Update these if statements to loop through the groups in a better fashion
def copytoDash(latestdownload, currentDash):
@ -76,3 +82,15 @@ def cleanitUp(currentDash):
if __name__ == "__main__":
copytemplate(rootdir, basefile)
# TODO: Dataset is too large. Add the math from the excel into the script so that the dataset references the final data and a much smaller set. Commented below is the math as far as I can tell.
"""
Math:
Courses:
Enrolled: For each unique course name, count number of "Enrolled" (Col. C) fields if != null.
Started: For each unique course name, count number of "Attempt Start" (Col. D) fields if != null.
Completed: For each unique course name, count number of "Progress" (Col. G) fields if == 100%
Activity Completions:
Started first activity == Started number above in Course
Activity >= 1 Completions:
"""