Gainsight/Scripts/Auto_Update_Excel/walmart.py

import os
from datetime import date
import glob
import re
import shutil
import pandas as pd

rootdir = "/Users/normrasmussen/Documents/Resources/Walmart/"
downloadir = (
    "/Users/normrasmussen/Google Drive/My Drive/Shared with Clients/Walmart_Looker/"
)
basefile = "Walmart_Weekly_Base.xlsx"


def copytemplate(rootdir, basefile):
    segments = ["Group1", "Group2", "Group3", "Group4", "Group5"]
    today = date.today()
    today = today.strftime("%m.%d.%Y")
    template = rootdir + basefile
    listfiles = glob.glob(downloadir + "WeeklyMCA*.csv")

    if os.path.exists(rootdir):
        for segment in segments:
            todayFile = f"Walmart-{segment}-{today}.xlsx"
            shutil.copy2(template, rootdir + todayFile)
            currentDash = rootdir + f"Walmart-{segment}-{today}.xlsx"
            for latestdownload in listfiles:
                if re.search('(?:Group1)', currentDash) and re.search('(?:Group1)', latestdownload):
                    copytoDash(latestdownload, currentDash)
                elif re.search('(?:Group2)', currentDash) and re.search('(?:Group2)', latestdownload):
                    copytoDash(latestdownload, currentDash)
                elif re.search('(?:Group3)', currentDash) and re.search('(?:Group3)', latestdownload):
                    copytoDash(latestdownload, currentDash)
                elif re.search('(?:Group4)', currentDash) and re.search('(?:Group4)', latestdownload):
                    copytoDash(latestdownload, currentDash)
                elif re.search('(?:Group5)', currentDash) and re.search('(?:Group5)', latestdownload):
                    copytoDash(latestdownload, currentDash)
                else:
                    print("No matching Looker Pulls or Templates Found!")
# TODO: Update these if statements to loop through the groups in a better fashion


def copytoDash(latestdownload, currentDash):
    readExport = pd.read_csv(
        latestdownload,
        index_col=False,
        header=0,
        low_memory=False,
        # dtype={"Progress": float},
    )
    print(readExport)
    readExport["Progress"] = readExport["Progress"].str[:-1].apply(pd.to_numeric)
    # readExport['Progress'].apply(pd.to_numeric, errors='ignore')
    print(readExport["Progress"])
    readExport.drop(readExport.filter(regex="Unname"), axis=1, inplace=True)
    copiedData = readExport.copy()
    bringtoExcel(latestdownload, currentDash, copiedData)


def bringtoExcel(latestdownload, currentDash, copiedData):
    with pd.ExcelWriter(
        currentDash,
        mode="a",
        engine="openpyxl",
        if_sheet_exists="overlay",
        # engine_kwargs={'options': {'strings_to_numbers': True}}
    ) as writer:
        copiedData.to_excel(
            writer,
            engine="xlsxwriter",
            sheet_name="Data",
            index=False,
        )


def cleanitUp(currentDash):
    cleanExcel = pd.read_excel(currentDash, sheet_name="Data", index_col=None)
    cleanExcel.columns.values[0] = "tmp"
    cleanExcel.drop(columns="tmp", axis=1, inplace=True)
    print(cleanExcel)


if __name__ == "__main__":
    copytemplate(rootdir, basefile)

# TODO: Dataset is too large. Add the math from the excel into the script so that the dataset references the final data and a much smaller set. Commented below is the math as far as I can tell.
"""
Math:
    Courses:
        Enrolled: For each unique course name, count number of "Enrolled" (Col. C) fields if != null.
        Started: For each unique course name, count number of "Attempt Start" (Col. D) fields if != null.
        Completed: For each unique course name, count number of "Progress" (Col. G) fields if == 100%
    Activity Completions:
        Started first activity == Started number above in Course
        Activity >= 1 Completions:
"""