Gainsight/Scripts/Auto_Update_Excel/walmart.py

import os
from datetime import date
import glob
import re
import shutil
import pandas as pd

rootdir = "/Users/normrasmussen/Documents/Resources/Walmart/"
downloadir = (
    "/Users/normrasmussen/Google Drive/My Drive/Shared with Clients/Walmart_Looker/"
)
basefile = "Walmart_Weekly_Base.xlsx"


def copytemplate(rootdir, basefile):
    segments = ["Group1", "Group2", "Group3", "Group4", "Group5"]
    today = date.today()
    today = today.strftime("%m.%d.%Y")
    template = rootdir + basefile
    listfiles = glob.glob(downloadir + "WeeklyMCA*.csv")

    if os.path.exists(rootdir):
        for segment in segments:
            todayFile = f"Walmart-{segment}-{today}.xlsx"
            shutil.copy2(template, rootdir + todayFile)
            currentDash = rootdir + f"Walmart-{segment}-{today}.xlsx"
            for latestdownload in listfiles:
                if re.search('(?:Group1)', currentDash) and re.search('(?:Group1)', latestdownload):
                    copytoDash(latestdownload, currentDash)
                elif re.search('(?:Group2)', currentDash) and re.search('(?:Group2)', latestdownload):
                    copytoDash(latestdownload, currentDash)
                elif re.search('(?:Group3)', currentDash) and re.search('(?:Group3)', latestdownload):
                    copytoDash(latestdownload, currentDash)
                elif re.search('(?:Group4)', currentDash) and re.search('(?:Group4)', latestdownload):
                    copytoDash(latestdownload, currentDash)
                elif re.search('(?:Group5)', currentDash) and re.search('(?:Group5)', latestdownload):
                    copytoDash(latestdownload, currentDash)
                else:
                    print("No matching Looker Pulls or Templates Found!")
# TODO: Update these if statements to loop through the groups in a better fashion


def copytoDash(latestdownload, currentDash):
    readExport = pd.read_csv(
        latestdownload,
        index_col=False,
        header=0,
        low_memory=False,
        # dtype={"Progress": float},
    )
    print(readExport)
    readExport["Progress"] = readExport["Progress"].str[:-1].apply(pd.to_numeric)
    # readExport['Progress'].apply(pd.to_numeric, errors='ignore')
    print(readExport["Progress"])
    readExport.drop(readExport.filter(regex="Unname"), axis=1, inplace=True)
    copiedData = readExport.copy()
    bringtoExcel(latestdownload, currentDash, copiedData)


def bringtoExcel(latestdownload, currentDash, copiedData):
    with pd.ExcelWriter(
        currentDash,
        mode="a",
        engine="openpyxl",
        if_sheet_exists="overlay",
        # engine_kwargs={'options': {'strings_to_numbers': True}}
    ) as writer:
        copiedData.to_excel(
            writer,
            engine="xlsxwriter",
            sheet_name="Data",
            index=False,
        )


def cleanitUp(currentDash):
    cleanExcel = pd.read_excel(currentDash, sheet_name="Data", index_col=None)
    cleanExcel.columns.values[0] = "tmp"
    cleanExcel.drop(columns="tmp", axis=1, inplace=True)
    print(cleanExcel)


if __name__ == "__main__":
    copytemplate(rootdir, basefile)

# TODO: Dataset is too large. Add the math from the excel into the script so that the dataset references the final data and a much smaller set. Commented below is the math as far as I can tell.
"""
Math:
    Courses:
        Enrolled: For each unique course name, count number of "Enrolled" (Col. C) fields if != null.
        Started: For each unique course name, count number of "Attempt Start" (Col. D) fields if != null.
        Completed: For each unique course name, count number of "Progress" (Col. G) fields if == 100%
    Activity Completions:
        Started first activity == Started number above in Course
        Activity >= 1 Completions:
"""
walmart script 2022-10-31 10:47:43 -04:00			`import os`
			`from datetime import date`
			`import glob`
Walmart snafu with too much data. Some notes and other small changes. 2023-02-07 18:07:54 -05:00			`import re`
walmart script 2022-10-31 10:47:43 -04:00			`import shutil`
			`import pandas as pd`

			`rootdir = "/Users/normrasmussen/Documents/Resources/Walmart/"`
Walmart script in production! And more notes. 2023-02-01 18:11:39 -05:00			`downloadir = (`
			`"/Users/normrasmussen/Google Drive/My Drive/Shared with Clients/Walmart_Looker/"`
			`)`
Swift Templates, Talkspace, Notes 2022-12-22 18:15:51 -05:00			`basefile = "Walmart_Weekly_Base.xlsx"`

walmart script 2022-10-31 10:47:43 -04:00
			`def copytemplate(rootdir, basefile):`
Walmart snafu with too much data. Some notes and other small changes. 2023-02-07 18:07:54 -05:00			`segments = ["Group1", "Group2", "Group3", "Group4", "Group5"]`
walmart script 2022-10-31 10:47:43 -04:00			`today = date.today()`
			`today = today.strftime("%m.%d.%Y")`
Swift Templates, Talkspace, Notes 2022-12-22 18:15:51 -05:00			`template = rootdir + basefile`
Walmart snafu with too much data. Some notes and other small changes. 2023-02-07 18:07:54 -05:00			`listfiles = glob.glob(downloadir + "WeeklyMCA*.csv")`
Swift Templates, Talkspace, Notes 2022-12-22 18:15:51 -05:00
Walmart snafu with too much data. Some notes and other small changes. 2023-02-07 18:07:54 -05:00			`if os.path.exists(rootdir):`
			`for segment in segments:`
			`todayFile = f"Walmart-{segment}-{today}.xlsx"`
			`shutil.copy2(template, rootdir + todayFile)`
			`currentDash = rootdir + f"Walmart-{segment}-{today}.xlsx"`
			`for latestdownload in listfiles:`
			`if re.search('(?:Group1)', currentDash) and re.search('(?:Group1)', latestdownload):`
			`copytoDash(latestdownload, currentDash)`
			`elif re.search('(?:Group2)', currentDash) and re.search('(?:Group2)', latestdownload):`
			`copytoDash(latestdownload, currentDash)`
			`elif re.search('(?:Group3)', currentDash) and re.search('(?:Group3)', latestdownload):`
			`copytoDash(latestdownload, currentDash)`
			`elif re.search('(?:Group4)', currentDash) and re.search('(?:Group4)', latestdownload):`
			`copytoDash(latestdownload, currentDash)`
			`elif re.search('(?:Group5)', currentDash) and re.search('(?:Group5)', latestdownload):`
			`copytoDash(latestdownload, currentDash)`
			`else:`
			`print("No matching Looker Pulls or Templates Found!")`
			`# TODO: Update these if statements to loop through the groups in a better fashion`
walmart script 2022-10-31 10:47:43 -04:00
Swift Templates, Talkspace, Notes 2022-12-22 18:15:51 -05:00
walmart script 2022-10-31 10:47:43 -04:00			`def copytoDash(latestdownload, currentDash):`
new JJSV templates & walmart script is being annoying 2022-11-01 15:19:04 -04:00			`readExport = pd.read_csv(`
Swift Templates, Talkspace, Notes 2022-12-22 18:15:51 -05:00			`latestdownload,`
			`index_col=False,`
			`header=0,`
			`low_memory=False,`
			`# dtype={"Progress": float},`
			`)`
some notes 2022-10-31 15:36:34 -04:00			`print(readExport)`
Walmart script in production! And more notes. 2023-02-01 18:11:39 -05:00			`readExport["Progress"] = readExport["Progress"].str[:-1].apply(pd.to_numeric)`
Swift Templates, Talkspace, Notes 2022-12-22 18:15:51 -05:00			`# readExport['Progress'].apply(pd.to_numeric, errors='ignore')`
Walmart script in production! And more notes. 2023-02-01 18:11:39 -05:00			`print(readExport["Progress"])`
Swift Templates, Talkspace, Notes 2022-12-22 18:15:51 -05:00			`readExport.drop(readExport.filter(regex="Unname"), axis=1, inplace=True)`
walmart script 2022-10-31 10:47:43 -04:00			`copiedData = readExport.copy()`
some notes 2022-10-31 15:36:34 -04:00			`bringtoExcel(latestdownload, currentDash, copiedData)`
walmart script 2022-10-31 10:47:43 -04:00
Swift Templates, Talkspace, Notes 2022-12-22 18:15:51 -05:00
some notes 2022-10-31 15:36:34 -04:00			`def bringtoExcel(latestdownload, currentDash, copiedData):`
walmart script 2022-10-31 10:47:43 -04:00			`with pd.ExcelWriter(`
Swift Templates, Talkspace, Notes 2022-12-22 18:15:51 -05:00			`currentDash,`
			`mode="a",`
			`engine="openpyxl",`
			`if_sheet_exists="overlay",`
			`# engine_kwargs={'options': {'strings_to_numbers': True}}`
walmart script 2022-10-31 10:47:43 -04:00			`) as writer:`
			`copiedData.to_excel(`
Swift Templates, Talkspace, Notes 2022-12-22 18:15:51 -05:00			`writer,`
			`engine="xlsxwriter",`
			`sheet_name="Data",`
			`index=False,`
			`)`

walmart script 2022-10-31 10:47:43 -04:00
			`def cleanitUp(currentDash):`
			`cleanExcel = pd.read_excel(currentDash, sheet_name="Data", index_col=None)`
			`cleanExcel.columns.values[0] = "tmp"`
			`cleanExcel.drop(columns="tmp", axis=1, inplace=True)`
			`print(cleanExcel)`

Swift Templates, Talkspace, Notes 2022-12-22 18:15:51 -05:00
walmart script 2022-10-31 10:47:43 -04:00			`if __name__ == "__main__":`
			`copytemplate(rootdir, basefile)`
Walmart snafu with too much data. Some notes and other small changes. 2023-02-07 18:07:54 -05:00
			`# TODO: Dataset is too large. Add the math from the excel into the script so that the dataset references the final data and a much smaller set. Commented below is the math as far as I can tell.`
			`"""`
			`Math:`
			`Courses:`
			`Enrolled: For each unique course name, count number of "Enrolled" (Col. C) fields if != null.`
			`Started: For each unique course name, count number of "Attempt Start" (Col. D) fields if != null.`
			`Completed: For each unique course name, count number of "Progress" (Col. G) fields if == 100%`
			`Activity Completions:`
			`Started first activity == Started number above in Course`
			`Activity >= 1 Completions:`
			`"""`