import os from datetime import date import glob import re import shutil import pandas as pd rootdir = "/Users/normrasmussen/Documents/Resources/Walmart/" downloadir = ( "/Users/normrasmussen/Google Drive/My Drive/Shared with Clients/Walmart_Looker/" ) basefile = "Walmart_Weekly_Base.xlsx" def copytemplate(rootdir, basefile): segments = ["Group1", "Group2", "Group3", "Group4", "Group5"] today = date.today() today = today.strftime("%m.%d.%Y") template = rootdir + basefile listfiles = glob.glob(downloadir + "WeeklyMCA*.csv") if os.path.exists(rootdir): for segment in segments: todayFile = f"Walmart-{segment}-{today}.xlsx" shutil.copy2(template, rootdir + todayFile) currentDash = rootdir + f"Walmart-{segment}-{today}.xlsx" for latestdownload in listfiles: if re.search('(?:Group1)', currentDash) and re.search('(?:Group1)', latestdownload): copytoDash(latestdownload, currentDash) elif re.search('(?:Group2)', currentDash) and re.search('(?:Group2)', latestdownload): copytoDash(latestdownload, currentDash) elif re.search('(?:Group3)', currentDash) and re.search('(?:Group3)', latestdownload): copytoDash(latestdownload, currentDash) elif re.search('(?:Group4)', currentDash) and re.search('(?:Group4)', latestdownload): copytoDash(latestdownload, currentDash) elif re.search('(?:Group5)', currentDash) and re.search('(?:Group5)', latestdownload): copytoDash(latestdownload, currentDash) else: print("No matching Looker Pulls or Templates Found!") # TODO: Update these if statements to loop through the groups in a better fashion def copytoDash(latestdownload, currentDash): readExport = pd.read_csv( latestdownload, index_col=False, header=0, low_memory=False, # dtype={"Progress": float}, ) print(readExport) readExport["Progress"] = readExport["Progress"].str[:-1].apply(pd.to_numeric) # readExport['Progress'].apply(pd.to_numeric, errors='ignore') print(readExport["Progress"]) readExport.drop(readExport.filter(regex="Unname"), axis=1, inplace=True) copiedData = readExport.copy() bringtoExcel(latestdownload, currentDash, copiedData) def bringtoExcel(latestdownload, currentDash, copiedData): with pd.ExcelWriter( currentDash, mode="a", engine="openpyxl", if_sheet_exists="overlay", # engine_kwargs={'options': {'strings_to_numbers': True}} ) as writer: copiedData.to_excel( writer, engine="xlsxwriter", sheet_name="Data", index=False, ) def cleanitUp(currentDash): cleanExcel = pd.read_excel(currentDash, sheet_name="Data", index_col=None) cleanExcel.columns.values[0] = "tmp" cleanExcel.drop(columns="tmp", axis=1, inplace=True) print(cleanExcel) if __name__ == "__main__": copytemplate(rootdir, basefile) # TODO: Dataset is too large. Add the math from the excel into the script so that the dataset references the final data and a much smaller set. Commented below is the math as far as I can tell. """ Math: Courses: Enrolled: For each unique course name, count number of "Enrolled" (Col. C) fields if != null. Started: For each unique course name, count number of "Attempt Start" (Col. D) fields if != null. Completed: For each unique course name, count number of "Progress" (Col. G) fields if == 100% Activity Completions: Started first activity == Started number above in Course Activity >= 1 Completions: """