Started the reorganization of Scripts for github. Some notes changes.

This commit is contained in:
Norm Rasmussen
2023-05-04 16:07:06 -04:00
parent 6a674ca24b
commit 18a266746b
209 changed files with 121 additions and 12181 deletions

View File

@ -0,0 +1,31 @@
asgineer==0.8.1
bcrypt==4.0.0
certifi==2022.6.15.1
charset-normalizer==2.1.1
click==8.1.3
greenlet==1.1.3
h11==0.13.0
idna==3.3
itemdb==1.1.1
Jinja2==3.1.2
Levenshtein==0.20.7
Markdown==3.4.1
MarkupSafe==2.1.1
msgpack==1.0.4
numpy==1.23.4
pandas==1.5.1
pscript==0.7.7
PyJWT==2.4.0
pynvim==0.4.3
python-dateutil==2.8.2
python-Levenshtein==0.20.7
pytz==2022.5
rapidfuzz==2.12.0
requests==2.28.1
six==1.16.0
timetagger==22.9.1
timetagger-cli==21.5.1
toml==0.10.2
urllib3==1.26.12
uvicorn==0.18.3
XlsxWriter==3.0.3

View File

@ -0,0 +1,96 @@
import os
from datetime import date
import glob
import re
import shutil
import pandas as pd
rootdir = "/Users/normrasmussen/Documents/Resources/Walmart/"
downloadir = (
"/Users/normrasmussen/Google Drive/My Drive/Shared with Clients/Walmart_Looker/"
)
basefile = "Walmart_Weekly_Base.xlsx"
def copytemplate(rootdir, basefile):
segments = ["Group1", "Group2", "Group3", "Group4", "Group5"]
today = date.today()
today = today.strftime("%m.%d.%Y")
template = rootdir + basefile
listfiles = glob.glob(downloadir + "WeeklyMCA*.csv")
if os.path.exists(rootdir):
for segment in segments:
todayFile = f"Walmart-{segment}-{today}.xlsx"
shutil.copy2(template, rootdir + todayFile)
currentDash = rootdir + f"Walmart-{segment}-{today}.xlsx"
for latestdownload in listfiles:
if re.search('(?:Group1)', currentDash) and re.search('(?:Group1)', latestdownload):
copytoDash(latestdownload, currentDash)
elif re.search('(?:Group2)', currentDash) and re.search('(?:Group2)', latestdownload):
copytoDash(latestdownload, currentDash)
elif re.search('(?:Group3)', currentDash) and re.search('(?:Group3)', latestdownload):
copytoDash(latestdownload, currentDash)
elif re.search('(?:Group4)', currentDash) and re.search('(?:Group4)', latestdownload):
copytoDash(latestdownload, currentDash)
elif re.search('(?:Group5)', currentDash) and re.search('(?:Group5)', latestdownload):
copytoDash(latestdownload, currentDash)
else:
print("No matching Looker Pulls or Templates Found!")
# TODO: Update these if statements to loop through the groups in a better fashion
def copytoDash(latestdownload, currentDash):
readExport = pd.read_csv(
latestdownload,
index_col=False,
header=0,
low_memory=False,
# dtype={"Progress": float},
)
print(readExport)
readExport["Progress"] = readExport["Progress"].str[:-1].apply(pd.to_numeric)
# readExport['Progress'].apply(pd.to_numeric, errors='ignore')
print(readExport["Progress"])
readExport.drop(readExport.filter(regex="Unname"), axis=1, inplace=True)
copiedData = readExport.copy()
bringtoExcel(latestdownload, currentDash, copiedData)
def bringtoExcel(latestdownload, currentDash, copiedData):
with pd.ExcelWriter(
currentDash,
mode="a",
engine="openpyxl",
if_sheet_exists="overlay",
# engine_kwargs={'options': {'strings_to_numbers': True}}
) as writer:
copiedData.to_excel(
writer,
engine="xlsxwriter",
sheet_name="Data",
index=False,
)
def cleanitUp(currentDash):
cleanExcel = pd.read_excel(currentDash, sheet_name="Data", index_col=None)
cleanExcel.columns.values[0] = "tmp"
cleanExcel.drop(columns="tmp", axis=1, inplace=True)
print(cleanExcel)
if __name__ == "__main__":
copytemplate(rootdir, basefile)
# TODO: Dataset is too large. Add the math from the excel into the script so that the dataset references the final data and a much smaller set. Commented below is the math as far as I can tell.
"""
Math:
Courses:
Enrolled: For each unique course name, count number of "Enrolled" (Col. C) fields if != null.
Started: For each unique course name, count number of "Attempt Start" (Col. D) fields if != null.
Completed: For each unique course name, count number of "Progress" (Col. G) fields if == 100%
Activity Completions:
Started first activity == Started number above in Course
Activity >= 1 Completions:
"""

View File

@ -0,0 +1,70 @@
import os
import sys
from datetime import date
import glob
import shutil
import csv
import pandas as pd
rootdir = "/Users/normrasmussen/Documents/Resources/Walmart/"
downloadir = "/Users/normrasmussen/Google Drive/My Drive/Shared with Clients/Walmart/"
basefile = "WalmartTemplate.xlsx"
def copytemplate(rootdir, basefile):
today = date.today()
today = today.strftime("%m.%d.%Y")
template = rootdir+basefile
todayFile = f"Walmart-{today}.xlsx"
if os.path.exists(rootdir):
shutil.copy2(template, rootdir+todayFile)
#dirfiles = os.listdir(rootdir)
#print(dirfiles)
currentDash = rootdir+f"Walmart-{today}.xlsx"
print(currentDash)
findlatestExport(currentDash)
def findlatestExport(currentDash):
listfiles = glob.glob(downloadir + '*.xlsx')
latestdownload = max(listfiles, key=os.path.getctime)
print(latestdownload)
copytoDash(latestdownload, currentDash)
def copytoDash(latestdownload, currentDash):
readExport = pd.read_excel(
latestdownload,
index_col=False,
header=0,
)
readExport.drop(
readExport.filter(
regex="Unname"
),axis=1,
inplace=True)
def progressFormat(latestdownload, currentDash, readExport):
df = pd.DataFrame(readExport)
readExport['Progress_replace'] = readExport['Progress'].str.replace('%','')
readExport['Progress_replace'] = pd.to_numeric(readExport['Progress_replace'])
bringtoExcel(latestdownload, currentDash, copiedData)
#cleanitUp(latestdownload, currentDash, copiedData)
def bringtoExcel(latestdownload, currentDash, copiedData):
with pd.ExcelWriter(
currentDash,
mode="a",
engine="openpyxl",
if_sheet_exists="overlay",
) as writer:
copiedData.to_excel(
writer,
engine="xlsxwriter",
sheet_name="Data",
index=False,
)
#def cleanitUp(latestdownload, currentDash, copiedData):
#cleanExcel = pd.read_excel(currentDash, sheet_name="Data", index_col=None)
# print(cleanExcel)
if __name__ == "__main__":
copytemplate(rootdir, basefile)