first commit
This commit is contained in:
71
apiscraper/spiders/dev_spider.py
Normal file
71
apiscraper/spiders/dev_spider.py
Normal file
@ -0,0 +1,71 @@
|
||||
from pathlib import Path
|
||||
import scrapy
|
||||
from scrapy_playwright.page import PageMethod
|
||||
from apiscraper.items import EndpointItems
|
||||
|
||||
class ApiSpider(scrapy.Spider):
|
||||
name = "api"
|
||||
|
||||
def start_requests(self):
|
||||
urls = [
|
||||
"https://developers.northpass.com/reference/get_v2-activities",
|
||||
]
|
||||
for url in urls:
|
||||
yield scrapy.Request(url=url, meta=dict(
|
||||
playwright = True,
|
||||
playwright_include_page = True,
|
||||
playwright_page_methods =[PageMethod('wait_for_selector', '//*[@id="content"]/div[4]/nav'),
|
||||
],
|
||||
errback=self.errback,
|
||||
))
|
||||
|
||||
async def parse(self, response):
|
||||
page = response.meta["playwright_page"]
|
||||
await page.close()
|
||||
|
||||
northpass_endpoints = {}
|
||||
|
||||
end_item = EndpointItems()
|
||||
end_item['inclusions'] = []
|
||||
end_item['params'] = []
|
||||
for item in response.xpath('//*[@id="Explorer"]'):
|
||||
end_item['title'] = response.xpath('//*[@id="content"]/header[1]/div[1]/h1/text()').get()
|
||||
end_item['method'] = response.xpath('//*[@id="content"]/header[1]/div[2]/span[1]/text()').get()
|
||||
end_item['endpoint'] = response.xpath('//*[@id="content"]/header[1]/div[2]/span[2]/text()[2]').get()
|
||||
end_item['inclusions'].append(response.xpath('//*[starts-with(@id,"query")]/div/p/code/span[position() >= 1 and not(position() > 15)]/text()').get())
|
||||
#end_item['params'].append(response.xpath('//*[starts-with(@id,"path"]/div/div[1]/div/label/text()').get())
|
||||
if end_item['inclusions'] is not None:
|
||||
inclusions = end_item['inclusions']
|
||||
else:
|
||||
pass
|
||||
|
||||
northpass_endpoints = {
|
||||
end_item['title'] : {
|
||||
'method' : end_item['method'],
|
||||
'endpoint' : end_item['endpoint'],
|
||||
'inclusions' : inclusions
|
||||
}
|
||||
}
|
||||
|
||||
yield northpass_endpoints
|
||||
yield EndpointItems()
|
||||
|
||||
self.log(northpass_endpoints)
|
||||
|
||||
next_page = response.xpath('//*[@id="content"]/div[4]/nav/a[2]/@href').get()
|
||||
self.log(f"The next page is {next_page}")
|
||||
|
||||
if next_page is not None:
|
||||
nexturl = response.urljoin(next_page)
|
||||
yield scrapy.Request(nexturl, meta=dict(
|
||||
playwright = True,
|
||||
playwright_include_page = True,
|
||||
playwright_page_methods =[
|
||||
PageMethod('wait_for_selector', '//*[@id="content"]/div[4]/nav'),
|
||||
],
|
||||
errback=self.errback,
|
||||
))
|
||||
|
||||
async def errback(self, failure):
|
||||
page = failure.request.meta["playwright_page"]
|
||||
await page.close()
|
||||
Reference in New Issue
Block a user