Anthology script for domain names. Used get courses for Walmart, and made a backup of the screenshots for 1-off courses. Glassdoor and todo notes.
This commit is contained in:
@ -133,3 +133,9 @@ Remaining Questions:
|
|||||||
* How to embed PX into their front end templates?
|
* How to embed PX into their front end templates?
|
||||||
* They measure traffic on help center - with PX?
|
* They measure traffic on help center - with PX?
|
||||||
* What are they searching for?
|
* What are they searching for?
|
||||||
|
|
||||||
|
## 03/11/2024
|
||||||
|
|
||||||
|
* Senior leadership is fully bought in on GD University being at the forefront of customer education.
|
||||||
|
* Unsure how to leverage more resources
|
||||||
|
* Need guidance on how the academy can be used at the right moment for strategic conversations between CSM and client.
|
||||||
|
|||||||
@ -4,7 +4,7 @@ import pprint
|
|||||||
import csv
|
import csv
|
||||||
|
|
||||||
pp = pprint.PrettyPrinter(indent=4)
|
pp = pprint.PrettyPrinter(indent=4)
|
||||||
APIKEY = Apikeys.CIN7
|
APIKEY = Apikeys.WALMARTPROD
|
||||||
HEADERS = {
|
HEADERS = {
|
||||||
"accept": "application/json",
|
"accept": "application/json",
|
||||||
"X-Api-Key": APIKEY,
|
"X-Api-Key": APIKEY,
|
||||||
@ -22,7 +22,7 @@ def get_courses():
|
|||||||
# for course_name in COURSES:
|
# for course_name in COURSES:
|
||||||
count += 1
|
count += 1
|
||||||
# url = f"https://api.northpass.com/v2/courses/?filter[name][eq]={course_name}"
|
# url = f"https://api.northpass.com/v2/courses/?filter[name][eq]={course_name}"
|
||||||
url = f"https://api.northpass.com/v2/courses/?limit=100&page={count}"
|
url = f"https://api2.northpass.com/v2/courses/?limit=100&page={count}"
|
||||||
response = requests.get(url, headers=HEADERS)
|
response = requests.get(url, headers=HEADERS)
|
||||||
response = response.json()
|
response = response.json()
|
||||||
nextlink = response["links"]
|
nextlink = response["links"]
|
||||||
@ -31,9 +31,9 @@ def get_courses():
|
|||||||
status = item['attributes']['status']
|
status = item['attributes']['status']
|
||||||
if status == 'live':
|
if status == 'live':
|
||||||
name = item['attributes']['name']
|
name = item['attributes']['name']
|
||||||
if "[Core]" in name:
|
idict = {item["attributes"]["name"]: item["id"]}
|
||||||
idict = {item["attributes"]["name"]: item["id"]}
|
list_of_ids.append(idict)
|
||||||
list_of_ids.append(idict)
|
# if "[Core]" in name:
|
||||||
|
|
||||||
if "next" not in nextlink:
|
if "next" not in nextlink:
|
||||||
break
|
break
|
||||||
@ -41,7 +41,7 @@ def get_courses():
|
|||||||
pp.pprint(list_of_ids)
|
pp.pprint(list_of_ids)
|
||||||
# print(len(list_of_ids))
|
# print(len(list_of_ids))
|
||||||
with open(
|
with open(
|
||||||
"/Users/normrasmussen/Downloads/Cin7-Courses.csv", "a+", newline='\n'
|
"/Users/normrasmussen/Downloads/Walmart-Live-Courses.csv", "a+", newline='\n'
|
||||||
) as csvfile:
|
) as csvfile:
|
||||||
for group in list_of_ids:
|
for group in list_of_ids:
|
||||||
for key, value in group.items():
|
for key, value in group.items():
|
||||||
|
|||||||
@ -5,6 +5,13 @@ for domain in DOMAINS:
|
|||||||
URL = f"https://{domain}"
|
URL = f"https://{domain}"
|
||||||
session = HTMLSession()
|
session = HTMLSession()
|
||||||
resp = session.get(URL)
|
resp = session.get(URL)
|
||||||
title = resp.html.find('head > title', first=True)
|
try:
|
||||||
group_name = title.text.split("|")[0]
|
title = resp.html.find('head > title', first=True)
|
||||||
print(group_name)
|
group_name = title.text.split("|")
|
||||||
|
print(group_name)
|
||||||
|
except AttributeError as e:
|
||||||
|
print(e)
|
||||||
|
finally:
|
||||||
|
title = resp.html.find('head > meta:nth-child(3)', first=True)
|
||||||
|
print(title.text)
|
||||||
|
|
||||||
|
|||||||
BIN
Scripts/Auto_Scrape_Screenshots/.DS_Store
vendored
BIN
Scripts/Auto_Scrape_Screenshots/.DS_Store
vendored
Binary file not shown.
@ -17,7 +17,7 @@ const getAllGroups = async (num) => {
|
|||||||
|
|
||||||
await axios({
|
await axios({
|
||||||
method: 'get',
|
method: 'get',
|
||||||
url: 'https://api2.northpass.com/v2/courses?page=${page}&limit=200',
|
url: 'https://api2.northpass.com/v2/courses?page=${page}&limit=100',
|
||||||
headers: {
|
headers: {
|
||||||
'accept': '*/*',
|
'accept': '*/*',
|
||||||
'x-api-key': apiKey,
|
'x-api-key': apiKey,
|
||||||
@ -83,17 +83,17 @@ async function courseOverview(id, i, num) {
|
|||||||
const resource = resourcetitle.trim();
|
const resource = resourcetitle.trim();
|
||||||
console.log(resource);
|
console.log(resource);
|
||||||
|
|
||||||
await page.screenshot({path: `${resource}_1.png`, fullPage:true, headless:"new"});
|
await page.screenshot({path: `${resource}_1.png`, fullPage:true });
|
||||||
|
|
||||||
const hrefs = await page.$$eval('a', as => as.map(a => a.href));
|
const hrefs = await page.$$eval('a', as => as.map(a => a.href));
|
||||||
var links = Object.entries(hrefs);
|
var links = Object.entries(hrefs);
|
||||||
console.log(links)
|
// console.log(links)
|
||||||
links.forEach(([key, value]) => {
|
links.forEach(([key, value]) => {
|
||||||
var link = value;
|
var link = value;
|
||||||
if (link.includes("activities") && (link.includes(id))) {
|
if (link.includes("activities") && (link.includes(id))) {
|
||||||
console.log("Activities Link")
|
// console.log("Activities Link")
|
||||||
console.log(link);
|
// console.log(link);
|
||||||
activity.push(link);
|
activity.push(link);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
setTimeout(() => {
|
setTimeout(() => {
|
||||||
@ -109,7 +109,7 @@ async function courseOverview(id, i, num) {
|
|||||||
await page.goto(newlink, {
|
await page.goto(newlink, {
|
||||||
waitUntil: 'load',
|
waitUntil: 'load',
|
||||||
timeout: 0 });
|
timeout: 0 });
|
||||||
await page.screenshot({path: `${resource}_${num}.png`, fullPage: true, headless:"new"});
|
await page.screenshot({path: `${resource}_${num}.png`, fullPage: true });
|
||||||
screenshots(newlink, resource, num, i);
|
screenshots(newlink, resource, num, i);
|
||||||
}
|
}
|
||||||
await browser.close();
|
await browser.close();
|
||||||
|
|||||||
95
Scripts/Auto_Scrape_Screenshots/single_resource.js
Normal file
95
Scripts/Auto_Scrape_Screenshots/single_resource.js
Normal file
@ -0,0 +1,95 @@
|
|||||||
|
const puppeteer = require('puppeteer');
|
||||||
|
const axios = require('axios')
|
||||||
|
const path = require('path')
|
||||||
|
|
||||||
|
const apiKey = "6hUfJdAartHTHhHc0WIRZYPWe"
|
||||||
|
// Spark Production ^
|
||||||
|
// const apiKey = "p5fidpuedHaOlPnd8EcpxzQMG"
|
||||||
|
// Luminate Production ^
|
||||||
|
const uid = "/\?uid\=7beg87y4-fh24-4929-3rt5-24kdn87s5241";
|
||||||
|
const groupIds = [
|
||||||
|
'855e2704-fd95-456f-a4eb-ce31c13b3072',
|
||||||
|
'ba23909e-49fd-4458-81bc-99e9fe35faee',
|
||||||
|
'efdfb1a5-58c8-4fd4-a31c-60946e73b6ed',
|
||||||
|
]
|
||||||
|
|
||||||
|
const getAllGroups = async (num) => {
|
||||||
|
let i = 0
|
||||||
|
loopIds(i)
|
||||||
|
}
|
||||||
|
|
||||||
|
function loopIds(i) {
|
||||||
|
console.log("Loop Ids Function")
|
||||||
|
var num = 1
|
||||||
|
id = groupIds[i]
|
||||||
|
courseOverview(id, i, num)
|
||||||
|
}
|
||||||
|
|
||||||
|
async function courseOverview(id, i, num) {
|
||||||
|
// console.log("Course Overview Function")
|
||||||
|
const activity = new Array();
|
||||||
|
const url = "https://walmart.northpass.com/app/courses/";
|
||||||
|
const browser = await puppeteer.launch();
|
||||||
|
const page = await browser.newPage();
|
||||||
|
const course = url+id+uid;
|
||||||
|
console.log(course)
|
||||||
|
if (course.includes("undefined")) {
|
||||||
|
console.log("Error - Undefined UUID. Possibly end of list. Exiting.")
|
||||||
|
await browser.close();
|
||||||
|
} else {
|
||||||
|
await page.setViewport({ width:390, height:844 })
|
||||||
|
// await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3419.0 Safari/537.36');
|
||||||
|
await page.goto(course, {
|
||||||
|
waitUntil: 'load',
|
||||||
|
timeout: 0 });
|
||||||
|
|
||||||
|
await page.title();
|
||||||
|
const [getXpath] = await page.$x('/html/body/div[1]/div');
|
||||||
|
const resourcetitle = await page.evaluate(name => name.innerText, getXpath);
|
||||||
|
// console.log(resourcetitle)
|
||||||
|
const resource = resourcetitle.trim();
|
||||||
|
console.log(resource);
|
||||||
|
|
||||||
|
await page.screenshot({path: `${resource}_1.png`, fullPage:true });
|
||||||
|
|
||||||
|
const hrefs = await page.$$eval('a', as => as.map(a => a.href));
|
||||||
|
var links = Object.entries(hrefs);
|
||||||
|
// console.log(links)
|
||||||
|
links.forEach(([key, value]) => {
|
||||||
|
var link = value;
|
||||||
|
if (link.includes("activities") && (link.includes(id))) {
|
||||||
|
// console.log("Activities Link")
|
||||||
|
// console.log(link);
|
||||||
|
activity.push(link);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
setTimeout(() => {
|
||||||
|
|
||||||
|
}, 10000);
|
||||||
|
uniqueLinks = [...new Set(activity)];
|
||||||
|
for (const link of uniqueLinks) {
|
||||||
|
|
||||||
|
num = num+1
|
||||||
|
const newlink = link+uid;
|
||||||
|
console.log("New Link: " + newlink)
|
||||||
|
await page.setViewport({ width: 390, height: 844 })
|
||||||
|
await page.goto(newlink, {
|
||||||
|
waitUntil: 'load',
|
||||||
|
timeout: 0 });
|
||||||
|
await page.screenshot({path: `${resource}_${num}.png`, fullPage: true });
|
||||||
|
screenshots(newlink, resource, num, i);
|
||||||
|
}
|
||||||
|
await browser.close();
|
||||||
|
i++;
|
||||||
|
loopIds(i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function screenshots(newlink, resource, num, i) {
|
||||||
|
const browser = await puppeteer.launch();
|
||||||
|
const page = await browser.newPage();
|
||||||
|
await browser.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
getAllGroups();
|
||||||
|
|
||||||
Reference in New Issue
Block a user