Walmart snafu with too much data. Some notes and other small changes.

2023-02-07 18:07:54 -05:00
parent b2548e676a
commit f5fee9b499
4 changed files with 76 additions and 20 deletions
--- a/Scripts/GoogleScripts/Artera_wowprog/Code.js
+++ b/Scripts/GoogleScripts/Artera_wowprog/Code.js
@ -49,7 +49,7 @@ function findRow(course, avgProgress, lastColumn) {
    }
  }
  writeSheet.getRange(numRows, 1).setValue("Average Progress across all Courses");
-  writeSheet.getRange(numRows, lastColumn+1).setValue(avgProgress(progArray));
+  writeSheet.getRange(numRows, lastColumn+1).setValue(averageProgress(progArray));
 }

 function percentDiff(prevVal, avgProgress){
@ -71,7 +71,7 @@ function percentDiff(prevVal, avgProgress){
  return percentChange
 }

-function avgProgress(progArray) {
+function averageProgress(progArray) {
  var progCalc = progArray => progArray.reduce((a,b) => a + b, 0) / progArray.length
  return progCalc
 }
--- a/Scripts/WalmartExcel/walmart.py
+++ b/Scripts/WalmartExcel/walmart.py
@ -1,6 +1,7 @@
 import os
 from datetime import date
 import glob
+import re
 import shutil
 import pandas as pd

@ -12,26 +13,31 @@ basefile = "Walmart_Weekly_Base.xlsx"


 def copytemplate(rootdir, basefile):
+    segments = ["Group1", "Group2", "Group3", "Group4", "Group5"]
    today = date.today()
    today = today.strftime("%m.%d.%Y")
    template = rootdir + basefile
-    # todayFile = f"Walmart-{today}.xlsx"
-    todayFile = "Walmart-Chris-Template.xlsx"
+    listfiles = glob.glob(downloadir + "WeeklyMCA*.csv")
+
    if os.path.exists(rootdir):
-        shutil.copy2(template, rootdir + todayFile)
-        # dirfiles = os.listdir(rootdir)
-        # print(dirfiles)
-        # currentDash = rootdir + f"Walmart-{today}.xlsx"
-        currentDash = rootdir + "Walmart-Chris-Template.xlsx"
-        # print(currentDash)
-        findlatestExport(currentDash)
-
-
-def findlatestExport(currentDash):
-    listfiles = glob.glob(downloadir + "*.csv")
-    latestdownload = max(listfiles, key=os.path.getctime)
-    # print(latestdownload)
-    copytoDash(latestdownload, currentDash)
+        for segment in segments:
+            todayFile = f"Walmart-{segment}-{today}.xlsx"
+            shutil.copy2(template, rootdir + todayFile)
+            currentDash = rootdir + f"Walmart-{segment}-{today}.xlsx"
+            for latestdownload in listfiles:
+                if re.search('(?:Group1)', currentDash) and re.search('(?:Group1)', latestdownload):
+                    copytoDash(latestdownload, currentDash)
+                elif re.search('(?:Group2)', currentDash) and re.search('(?:Group2)', latestdownload):
+                    copytoDash(latestdownload, currentDash)
+                elif re.search('(?:Group3)', currentDash) and re.search('(?:Group3)', latestdownload):
+                    copytoDash(latestdownload, currentDash)
+                elif re.search('(?:Group4)', currentDash) and re.search('(?:Group4)', latestdownload):
+                    copytoDash(latestdownload, currentDash)
+                elif re.search('(?:Group5)', currentDash) and re.search('(?:Group5)', latestdownload):
+                    copytoDash(latestdownload, currentDash)
+                else:
+                    print("No matching Looker Pulls or Templates Found!")
+# TODO: Update these if statements to loop through the groups in a better fashion


 def copytoDash(latestdownload, currentDash):
@ -76,3 +82,15 @@ def cleanitUp(currentDash):

 if __name__ == "__main__":
    copytemplate(rootdir, basefile)
+
+# TODO: Dataset is too large. Add the math from the excel into the script so that the dataset references the final data and a much smaller set. Commented below is the math as far as I can tell.
+"""
+Math:
+    Courses:
+        Enrolled: For each unique course name, count number of "Enrolled" (Col. C) fields if != null.
+        Started: For each unique course name, count number of "Attempt Start" (Col. D) fields if != null.
+        Completed: For each unique course name, count number of "Progress" (Col. G) fields if == 100%
+    Activity Completions:
+        Started first activity == Started number above in Course
+        Activity >= 1 Completions:
+"""