Walmart screenshot script is finalized for Azure functions
This commit is contained in:
105
Scripts/Walmart/Spark/screenshots_to_pdf.py
Normal file
105
Scripts/Walmart/Spark/screenshots_to_pdf.py
Normal file
@ -0,0 +1,105 @@
|
||||
import re
|
||||
import sys
|
||||
from playwright.sync_api import sync_playwright, Playwright
|
||||
from PIL import Image
|
||||
import glob
|
||||
import os
|
||||
from datetime import date
|
||||
|
||||
|
||||
BASEURL = "https://walmart.northpass.com/app/courses/"
|
||||
USERID = "?uid=7beg87y4-fh24-4929-3rt5-24kdn87s5241"
|
||||
COURSEID = sys.argv[1]
|
||||
DIR = "./"
|
||||
|
||||
def run(playwright: Playwright):
|
||||
print("running")
|
||||
webkit = playwright.webkit
|
||||
browser = webkit.launch()
|
||||
context = browser.new_context(
|
||||
viewport={ 'width': 390, 'height': 844 }
|
||||
)
|
||||
page = context.new_page()
|
||||
screen_num = 0
|
||||
|
||||
# Navigate to and screenshot the title page
|
||||
page.goto(f"{BASEURL}{COURSEID}{USERID}")
|
||||
course_name = page.locator('.header-title').evaluate("node => node.innerText")
|
||||
print(course_name)
|
||||
|
||||
# Extract all hrefs from the course outline
|
||||
course_links = page.locator('.np-course-outline-content-section a').evaluate_all('elements => elements.map(el => el.href)')
|
||||
page.screenshot(path=f"{DIR}-{course_name}_{screen_num}.png", full_page=True)
|
||||
|
||||
print("Course outline links:")
|
||||
for idx, link in enumerate(course_links):
|
||||
screen_num += 1
|
||||
print(f"{idx + 1}. {link}")
|
||||
page.goto(f"{link}")
|
||||
page.screenshot(path=f"{DIR}-{course_name}_{screen_num}.png", full_page=True)
|
||||
browser.close()
|
||||
# find_pictures()
|
||||
|
||||
|
||||
def find_pictures(DIR):
|
||||
files = []
|
||||
listfiles = glob.glob(DIR + "*.png")
|
||||
for file in listfiles:
|
||||
files.append(os.path.basename(file))
|
||||
# Now file will only show the file name, not the entire path
|
||||
split_resources(files, DIR)
|
||||
|
||||
|
||||
def split_resources(files, DIR):
|
||||
try:
|
||||
resource_title = files[0][:-6]
|
||||
# resource_title = resource_title[:-6]
|
||||
files.sort()
|
||||
new_list = []
|
||||
for file in files:
|
||||
if resource_title in file:
|
||||
new_list.append(file)
|
||||
for item in new_list:
|
||||
files.remove(item)
|
||||
split_resources(files, DIR)
|
||||
process_pictures(new_list, resource_title, DIR)
|
||||
except IndexError as e:
|
||||
print(e)
|
||||
finally:
|
||||
pass
|
||||
|
||||
|
||||
def process_pictures(new_list, resource_title, DIR):
|
||||
resource_title = re.sub(r'[?]', "", resource_title)
|
||||
today = date.today()
|
||||
today = today.strftime("%m.%d.%Y")
|
||||
image_list = []
|
||||
resource = Image.open(new_list[0])
|
||||
resource = resource.convert("RGB")
|
||||
for picture in new_list[1:]:
|
||||
image = Image.open(picture)
|
||||
converted = image.convert("RGB")
|
||||
image_list.append(converted)
|
||||
# image_list.append(image)
|
||||
resource.save(
|
||||
DIR + f"PDFs/{resource_title}_{today}.pdf",
|
||||
save_all=True,
|
||||
append_images=image_list,
|
||||
)
|
||||
|
||||
|
||||
def delete_originals(DIR):
|
||||
path = glob.glob(DIR + "*.png")
|
||||
for file in path:
|
||||
try:
|
||||
os.remove(file)
|
||||
except TypeError as e:
|
||||
print("Error!")
|
||||
print(e)
|
||||
print("All Done")
|
||||
|
||||
|
||||
with sync_playwright() as playwright:
|
||||
run(playwright)
|
||||
find_pictures(DIR)
|
||||
delete_originals(DIR)
|
||||
Reference in New Issue
Block a user