Started the reorganization of Scripts for github. Some notes changes.
This commit is contained in:
31
Scripts/Auto_Update_Excel/requirements.txt
Normal file
31
Scripts/Auto_Update_Excel/requirements.txt
Normal file
@ -0,0 +1,31 @@
|
||||
asgineer==0.8.1
|
||||
bcrypt==4.0.0
|
||||
certifi==2022.6.15.1
|
||||
charset-normalizer==2.1.1
|
||||
click==8.1.3
|
||||
greenlet==1.1.3
|
||||
h11==0.13.0
|
||||
idna==3.3
|
||||
itemdb==1.1.1
|
||||
Jinja2==3.1.2
|
||||
Levenshtein==0.20.7
|
||||
Markdown==3.4.1
|
||||
MarkupSafe==2.1.1
|
||||
msgpack==1.0.4
|
||||
numpy==1.23.4
|
||||
pandas==1.5.1
|
||||
pscript==0.7.7
|
||||
PyJWT==2.4.0
|
||||
pynvim==0.4.3
|
||||
python-dateutil==2.8.2
|
||||
python-Levenshtein==0.20.7
|
||||
pytz==2022.5
|
||||
rapidfuzz==2.12.0
|
||||
requests==2.28.1
|
||||
six==1.16.0
|
||||
timetagger==22.9.1
|
||||
timetagger-cli==21.5.1
|
||||
toml==0.10.2
|
||||
urllib3==1.26.12
|
||||
uvicorn==0.18.3
|
||||
XlsxWriter==3.0.3
|
||||
96
Scripts/Auto_Update_Excel/walmart.py
Normal file
96
Scripts/Auto_Update_Excel/walmart.py
Normal file
@ -0,0 +1,96 @@
|
||||
import os
|
||||
from datetime import date
|
||||
import glob
|
||||
import re
|
||||
import shutil
|
||||
import pandas as pd
|
||||
|
||||
rootdir = "/Users/normrasmussen/Documents/Resources/Walmart/"
|
||||
downloadir = (
|
||||
"/Users/normrasmussen/Google Drive/My Drive/Shared with Clients/Walmart_Looker/"
|
||||
)
|
||||
basefile = "Walmart_Weekly_Base.xlsx"
|
||||
|
||||
|
||||
def copytemplate(rootdir, basefile):
|
||||
segments = ["Group1", "Group2", "Group3", "Group4", "Group5"]
|
||||
today = date.today()
|
||||
today = today.strftime("%m.%d.%Y")
|
||||
template = rootdir + basefile
|
||||
listfiles = glob.glob(downloadir + "WeeklyMCA*.csv")
|
||||
|
||||
if os.path.exists(rootdir):
|
||||
for segment in segments:
|
||||
todayFile = f"Walmart-{segment}-{today}.xlsx"
|
||||
shutil.copy2(template, rootdir + todayFile)
|
||||
currentDash = rootdir + f"Walmart-{segment}-{today}.xlsx"
|
||||
for latestdownload in listfiles:
|
||||
if re.search('(?:Group1)', currentDash) and re.search('(?:Group1)', latestdownload):
|
||||
copytoDash(latestdownload, currentDash)
|
||||
elif re.search('(?:Group2)', currentDash) and re.search('(?:Group2)', latestdownload):
|
||||
copytoDash(latestdownload, currentDash)
|
||||
elif re.search('(?:Group3)', currentDash) and re.search('(?:Group3)', latestdownload):
|
||||
copytoDash(latestdownload, currentDash)
|
||||
elif re.search('(?:Group4)', currentDash) and re.search('(?:Group4)', latestdownload):
|
||||
copytoDash(latestdownload, currentDash)
|
||||
elif re.search('(?:Group5)', currentDash) and re.search('(?:Group5)', latestdownload):
|
||||
copytoDash(latestdownload, currentDash)
|
||||
else:
|
||||
print("No matching Looker Pulls or Templates Found!")
|
||||
# TODO: Update these if statements to loop through the groups in a better fashion
|
||||
|
||||
|
||||
def copytoDash(latestdownload, currentDash):
|
||||
readExport = pd.read_csv(
|
||||
latestdownload,
|
||||
index_col=False,
|
||||
header=0,
|
||||
low_memory=False,
|
||||
# dtype={"Progress": float},
|
||||
)
|
||||
print(readExport)
|
||||
readExport["Progress"] = readExport["Progress"].str[:-1].apply(pd.to_numeric)
|
||||
# readExport['Progress'].apply(pd.to_numeric, errors='ignore')
|
||||
print(readExport["Progress"])
|
||||
readExport.drop(readExport.filter(regex="Unname"), axis=1, inplace=True)
|
||||
copiedData = readExport.copy()
|
||||
bringtoExcel(latestdownload, currentDash, copiedData)
|
||||
|
||||
|
||||
def bringtoExcel(latestdownload, currentDash, copiedData):
|
||||
with pd.ExcelWriter(
|
||||
currentDash,
|
||||
mode="a",
|
||||
engine="openpyxl",
|
||||
if_sheet_exists="overlay",
|
||||
# engine_kwargs={'options': {'strings_to_numbers': True}}
|
||||
) as writer:
|
||||
copiedData.to_excel(
|
||||
writer,
|
||||
engine="xlsxwriter",
|
||||
sheet_name="Data",
|
||||
index=False,
|
||||
)
|
||||
|
||||
|
||||
def cleanitUp(currentDash):
|
||||
cleanExcel = pd.read_excel(currentDash, sheet_name="Data", index_col=None)
|
||||
cleanExcel.columns.values[0] = "tmp"
|
||||
cleanExcel.drop(columns="tmp", axis=1, inplace=True)
|
||||
print(cleanExcel)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
copytemplate(rootdir, basefile)
|
||||
|
||||
# TODO: Dataset is too large. Add the math from the excel into the script so that the dataset references the final data and a much smaller set. Commented below is the math as far as I can tell.
|
||||
"""
|
||||
Math:
|
||||
Courses:
|
||||
Enrolled: For each unique course name, count number of "Enrolled" (Col. C) fields if != null.
|
||||
Started: For each unique course name, count number of "Attempt Start" (Col. D) fields if != null.
|
||||
Completed: For each unique course name, count number of "Progress" (Col. G) fields if == 100%
|
||||
Activity Completions:
|
||||
Started first activity == Started number above in Course
|
||||
Activity >= 1 Completions:
|
||||
"""
|
||||
70
Scripts/Auto_Update_Excel/walmart_numeric.py
Normal file
70
Scripts/Auto_Update_Excel/walmart_numeric.py
Normal file
@ -0,0 +1,70 @@
|
||||
import os
|
||||
import sys
|
||||
from datetime import date
|
||||
import glob
|
||||
import shutil
|
||||
import csv
|
||||
import pandas as pd
|
||||
|
||||
rootdir = "/Users/normrasmussen/Documents/Resources/Walmart/"
|
||||
downloadir = "/Users/normrasmussen/Google Drive/My Drive/Shared with Clients/Walmart/"
|
||||
basefile = "WalmartTemplate.xlsx"
|
||||
|
||||
def copytemplate(rootdir, basefile):
|
||||
today = date.today()
|
||||
today = today.strftime("%m.%d.%Y")
|
||||
template = rootdir+basefile
|
||||
todayFile = f"Walmart-{today}.xlsx"
|
||||
if os.path.exists(rootdir):
|
||||
shutil.copy2(template, rootdir+todayFile)
|
||||
#dirfiles = os.listdir(rootdir)
|
||||
#print(dirfiles)
|
||||
currentDash = rootdir+f"Walmart-{today}.xlsx"
|
||||
print(currentDash)
|
||||
findlatestExport(currentDash)
|
||||
|
||||
def findlatestExport(currentDash):
|
||||
listfiles = glob.glob(downloadir + '*.xlsx')
|
||||
latestdownload = max(listfiles, key=os.path.getctime)
|
||||
print(latestdownload)
|
||||
copytoDash(latestdownload, currentDash)
|
||||
|
||||
def copytoDash(latestdownload, currentDash):
|
||||
readExport = pd.read_excel(
|
||||
latestdownload,
|
||||
index_col=False,
|
||||
header=0,
|
||||
)
|
||||
readExport.drop(
|
||||
readExport.filter(
|
||||
regex="Unname"
|
||||
),axis=1,
|
||||
inplace=True)
|
||||
|
||||
def progressFormat(latestdownload, currentDash, readExport):
|
||||
df = pd.DataFrame(readExport)
|
||||
readExport['Progress_replace'] = readExport['Progress'].str.replace('%','')
|
||||
readExport['Progress_replace'] = pd.to_numeric(readExport['Progress_replace'])
|
||||
bringtoExcel(latestdownload, currentDash, copiedData)
|
||||
#cleanitUp(latestdownload, currentDash, copiedData)
|
||||
|
||||
def bringtoExcel(latestdownload, currentDash, copiedData):
|
||||
with pd.ExcelWriter(
|
||||
currentDash,
|
||||
mode="a",
|
||||
engine="openpyxl",
|
||||
if_sheet_exists="overlay",
|
||||
) as writer:
|
||||
copiedData.to_excel(
|
||||
writer,
|
||||
engine="xlsxwriter",
|
||||
sheet_name="Data",
|
||||
index=False,
|
||||
)
|
||||
|
||||
#def cleanitUp(latestdownload, currentDash, copiedData):
|
||||
#cleanExcel = pd.read_excel(currentDash, sheet_name="Data", index_col=None)
|
||||
# print(cleanExcel)
|
||||
|
||||
if __name__ == "__main__":
|
||||
copytemplate(rootdir, basefile)
|
||||
Reference in New Issue
Block a user