Started the reorganization of Scripts for github. Some notes changes.

2023-05-04 16:07:06 -04:00
parent 6a674ca24b
commit 18a266746b
209 changed files with 121 additions and 12181 deletions
--- a/Scripts/Auto_Update_Excel/requirements.txt
+++ b/Scripts/Auto_Update_Excel/requirements.txt
@ -0,0 +1,31 @@
+asgineer==0.8.1
+bcrypt==4.0.0
+certifi==2022.6.15.1
+charset-normalizer==2.1.1
+click==8.1.3
+greenlet==1.1.3
+h11==0.13.0
+idna==3.3
+itemdb==1.1.1
+Jinja2==3.1.2
+Levenshtein==0.20.7
+Markdown==3.4.1
+MarkupSafe==2.1.1
+msgpack==1.0.4
+numpy==1.23.4
+pandas==1.5.1
+pscript==0.7.7
+PyJWT==2.4.0
+pynvim==0.4.3
+python-dateutil==2.8.2
+python-Levenshtein==0.20.7
+pytz==2022.5
+rapidfuzz==2.12.0
+requests==2.28.1
+six==1.16.0
+timetagger==22.9.1
+timetagger-cli==21.5.1
+toml==0.10.2
+urllib3==1.26.12
+uvicorn==0.18.3
+XlsxWriter==3.0.3
--- a/Scripts/Auto_Update_Excel/walmart.py
+++ b/Scripts/Auto_Update_Excel/walmart.py
@ -0,0 +1,96 @@
+import os
+from datetime import date
+import glob
+import re
+import shutil
+import pandas as pd
+
+rootdir = "/Users/normrasmussen/Documents/Resources/Walmart/"
+downloadir = (
+    "/Users/normrasmussen/Google Drive/My Drive/Shared with Clients/Walmart_Looker/"
+)
+basefile = "Walmart_Weekly_Base.xlsx"
+
+
+def copytemplate(rootdir, basefile):
+    segments = ["Group1", "Group2", "Group3", "Group4", "Group5"]
+    today = date.today()
+    today = today.strftime("%m.%d.%Y")
+    template = rootdir + basefile
+    listfiles = glob.glob(downloadir + "WeeklyMCA*.csv")
+
+    if os.path.exists(rootdir):
+        for segment in segments:
+            todayFile = f"Walmart-{segment}-{today}.xlsx"
+            shutil.copy2(template, rootdir + todayFile)
+            currentDash = rootdir + f"Walmart-{segment}-{today}.xlsx"
+            for latestdownload in listfiles:
+                if re.search('(?:Group1)', currentDash) and re.search('(?:Group1)', latestdownload):
+                    copytoDash(latestdownload, currentDash)
+                elif re.search('(?:Group2)', currentDash) and re.search('(?:Group2)', latestdownload):
+                    copytoDash(latestdownload, currentDash)
+                elif re.search('(?:Group3)', currentDash) and re.search('(?:Group3)', latestdownload):
+                    copytoDash(latestdownload, currentDash)
+                elif re.search('(?:Group4)', currentDash) and re.search('(?:Group4)', latestdownload):
+                    copytoDash(latestdownload, currentDash)
+                elif re.search('(?:Group5)', currentDash) and re.search('(?:Group5)', latestdownload):
+                    copytoDash(latestdownload, currentDash)
+                else:
+                    print("No matching Looker Pulls or Templates Found!")
+# TODO: Update these if statements to loop through the groups in a better fashion
+
+
+def copytoDash(latestdownload, currentDash):
+    readExport = pd.read_csv(
+        latestdownload,
+        index_col=False,
+        header=0,
+        low_memory=False,
+        # dtype={"Progress": float},
+    )
+    print(readExport)
+    readExport["Progress"] = readExport["Progress"].str[:-1].apply(pd.to_numeric)
+    # readExport['Progress'].apply(pd.to_numeric, errors='ignore')
+    print(readExport["Progress"])
+    readExport.drop(readExport.filter(regex="Unname"), axis=1, inplace=True)
+    copiedData = readExport.copy()
+    bringtoExcel(latestdownload, currentDash, copiedData)
+
+
+def bringtoExcel(latestdownload, currentDash, copiedData):
+    with pd.ExcelWriter(
+        currentDash,
+        mode="a",
+        engine="openpyxl",
+        if_sheet_exists="overlay",
+        # engine_kwargs={'options': {'strings_to_numbers': True}}
+    ) as writer:
+        copiedData.to_excel(
+            writer,
+            engine="xlsxwriter",
+            sheet_name="Data",
+            index=False,
+        )
+
+
+def cleanitUp(currentDash):
+    cleanExcel = pd.read_excel(currentDash, sheet_name="Data", index_col=None)
+    cleanExcel.columns.values[0] = "tmp"
+    cleanExcel.drop(columns="tmp", axis=1, inplace=True)
+    print(cleanExcel)
+
+
+if __name__ == "__main__":
+    copytemplate(rootdir, basefile)
+
+# TODO: Dataset is too large. Add the math from the excel into the script so that the dataset references the final data and a much smaller set. Commented below is the math as far as I can tell.
+"""
+Math:
+    Courses:
+        Enrolled: For each unique course name, count number of "Enrolled" (Col. C) fields if != null.
+        Started: For each unique course name, count number of "Attempt Start" (Col. D) fields if != null.
+        Completed: For each unique course name, count number of "Progress" (Col. G) fields if == 100%
+    Activity Completions:
+        Started first activity == Started number above in Course
+        Activity >= 1 Completions:
+"""
--- a/Scripts/Auto_Update_Excel/walmart_numeric.py
+++ b/Scripts/Auto_Update_Excel/walmart_numeric.py
@ -0,0 +1,70 @@
+import os
+import sys
+from datetime import date
+import glob
+import shutil
+import csv
+import pandas as pd
+
+rootdir = "/Users/normrasmussen/Documents/Resources/Walmart/"
+downloadir = "/Users/normrasmussen/Google Drive/My Drive/Shared with Clients/Walmart/"
+basefile = "WalmartTemplate.xlsx"
+
+def copytemplate(rootdir, basefile):
+    today = date.today()
+    today = today.strftime("%m.%d.%Y")
+    template = rootdir+basefile
+    todayFile = f"Walmart-{today}.xlsx"
+    if os.path.exists(rootdir):
+        shutil.copy2(template, rootdir+todayFile)
+        #dirfiles = os.listdir(rootdir)
+        #print(dirfiles)
+        currentDash = rootdir+f"Walmart-{today}.xlsx"
+        print(currentDash)
+        findlatestExport(currentDash)
+
+def findlatestExport(currentDash):
+    listfiles = glob.glob(downloadir + '*.xlsx')
+    latestdownload = max(listfiles, key=os.path.getctime)
+    print(latestdownload)
+    copytoDash(latestdownload, currentDash)
+
+def copytoDash(latestdownload, currentDash):
+    readExport = pd.read_excel(
+            latestdownload,
+            index_col=False,
+            header=0,
+            )
+    readExport.drop(
+            readExport.filter(
+                regex="Unname"
+                ),axis=1,
+                inplace=True)
+
+def progressFormat(latestdownload, currentDash, readExport):
+   df = pd.DataFrame(readExport)
+    readExport['Progress_replace'] = readExport['Progress'].str.replace('%','')
+    readExport['Progress_replace'] = pd.to_numeric(readExport['Progress_replace'])
+    bringtoExcel(latestdownload, currentDash, copiedData)
+    #cleanitUp(latestdownload, currentDash, copiedData)
+
+def bringtoExcel(latestdownload, currentDash, copiedData):
+    with pd.ExcelWriter(
+            currentDash,
+            mode="a",
+            engine="openpyxl",
+            if_sheet_exists="overlay",
+    ) as writer:
+        copiedData.to_excel(
+                writer,
+                engine="xlsxwriter",
+                sheet_name="Data",
+                index=False,
+    )
+
+#def cleanitUp(latestdownload, currentDash, copiedData):
+    #cleanExcel = pd.read_excel(currentDash, sheet_name="Data", index_col=None)
+#    print(cleanExcel)
+
+if __name__ == "__main__":
+    copytemplate(rootdir, basefile)