97 lines
3.7 KiB
Python
97 lines
3.7 KiB
Python
import os
|
|
from datetime import date
|
|
import glob
|
|
import re
|
|
import shutil
|
|
import pandas as pd
|
|
|
|
rootdir = "/Users/normrasmussen/Documents/Resources/Walmart/"
|
|
downloadir = (
|
|
"/Users/normrasmussen/Google Drive/My Drive/Shared with Clients/Walmart_Looker/"
|
|
)
|
|
basefile = "Walmart_Weekly_Base.xlsx"
|
|
|
|
|
|
def copytemplate(rootdir, basefile):
|
|
segments = ["Group1", "Group2", "Group3", "Group4", "Group5"]
|
|
today = date.today()
|
|
today = today.strftime("%m.%d.%Y")
|
|
template = rootdir + basefile
|
|
listfiles = glob.glob(downloadir + "WeeklyMCA*.csv")
|
|
|
|
if os.path.exists(rootdir):
|
|
for segment in segments:
|
|
todayFile = f"Walmart-{segment}-{today}.xlsx"
|
|
shutil.copy2(template, rootdir + todayFile)
|
|
currentDash = rootdir + f"Walmart-{segment}-{today}.xlsx"
|
|
for latestdownload in listfiles:
|
|
if re.search('(?:Group1)', currentDash) and re.search('(?:Group1)', latestdownload):
|
|
copytoDash(latestdownload, currentDash)
|
|
elif re.search('(?:Group2)', currentDash) and re.search('(?:Group2)', latestdownload):
|
|
copytoDash(latestdownload, currentDash)
|
|
elif re.search('(?:Group3)', currentDash) and re.search('(?:Group3)', latestdownload):
|
|
copytoDash(latestdownload, currentDash)
|
|
elif re.search('(?:Group4)', currentDash) and re.search('(?:Group4)', latestdownload):
|
|
copytoDash(latestdownload, currentDash)
|
|
elif re.search('(?:Group5)', currentDash) and re.search('(?:Group5)', latestdownload):
|
|
copytoDash(latestdownload, currentDash)
|
|
else:
|
|
print("No matching Looker Pulls or Templates Found!")
|
|
# TODO: Update these if statements to loop through the groups in a better fashion
|
|
|
|
|
|
def copytoDash(latestdownload, currentDash):
|
|
readExport = pd.read_csv(
|
|
latestdownload,
|
|
index_col=False,
|
|
header=0,
|
|
low_memory=False,
|
|
# dtype={"Progress": float},
|
|
)
|
|
print(readExport)
|
|
readExport["Progress"] = readExport["Progress"].str[:-1].apply(pd.to_numeric)
|
|
# readExport['Progress'].apply(pd.to_numeric, errors='ignore')
|
|
print(readExport["Progress"])
|
|
readExport.drop(readExport.filter(regex="Unname"), axis=1, inplace=True)
|
|
copiedData = readExport.copy()
|
|
bringtoExcel(latestdownload, currentDash, copiedData)
|
|
|
|
|
|
def bringtoExcel(latestdownload, currentDash, copiedData):
|
|
with pd.ExcelWriter(
|
|
currentDash,
|
|
mode="a",
|
|
engine="openpyxl",
|
|
if_sheet_exists="overlay",
|
|
# engine_kwargs={'options': {'strings_to_numbers': True}}
|
|
) as writer:
|
|
copiedData.to_excel(
|
|
writer,
|
|
engine="xlsxwriter",
|
|
sheet_name="Data",
|
|
index=False,
|
|
)
|
|
|
|
|
|
def cleanitUp(currentDash):
|
|
cleanExcel = pd.read_excel(currentDash, sheet_name="Data", index_col=None)
|
|
cleanExcel.columns.values[0] = "tmp"
|
|
cleanExcel.drop(columns="tmp", axis=1, inplace=True)
|
|
print(cleanExcel)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
copytemplate(rootdir, basefile)
|
|
|
|
# TODO: Dataset is too large. Add the math from the excel into the script so that the dataset references the final data and a much smaller set. Commented below is the math as far as I can tell.
|
|
"""
|
|
Math:
|
|
Courses:
|
|
Enrolled: For each unique course name, count number of "Enrolled" (Col. C) fields if != null.
|
|
Started: For each unique course name, count number of "Attempt Start" (Col. D) fields if != null.
|
|
Completed: For each unique course name, count number of "Progress" (Col. G) fields if == 100%
|
|
Activity Completions:
|
|
Started first activity == Started number above in Course
|
|
Activity >= 1 Completions:
|
|
"""
|