Figured out most data points I would like. Need to clean it up and figure out adding it to a database.

2023-02-20 21:35:44 -05:00
parent 4fb10c8ff3
commit 37f6311d4b
6 changed files with 221 additions and 208 deletions
--- a/apiscraper/spiders/dev_spider.py
+++ b/apiscraper/spiders/dev_spider.py
@ -8,7 +8,7 @@ class ApiSpider(scrapy.Spider):

    def start_requests(self):
        urls = [
-                "https://developers.northpass.com/reference/get_v2-activities",
+                "https://developers.northpass.com/reference/get_v2-activities"
                ]
        for url in urls:
            yield scrapy.Request(url=url, meta=dict(
@ -27,13 +27,25 @@ class ApiSpider(scrapy.Spider):

        end_item = EndpointItems()
        end_item['inclusions'] = []
-        end_item['params'] = []
+        end_item['req_param'] = []
        for item in response.xpath('//*[@id="Explorer"]'):
            end_item['title'] = response.xpath('//*[@id="content"]/header[1]/div[1]/h1/text()').get()
+
            end_item['method'] = response.xpath('//*[@id="content"]/header[1]/div[2]/span[1]/text()').get()
-            end_item['endpoint'] = response.xpath('//*[@id="content"]/header[1]/div[2]/span[2]/text()[2]').get()
+
+            end_item['endpoint'] = response.xpath('//*[@id="content"]/header[1]/div[2]/span[2]').xpath("@title").extract()
+
+            end_item['req_param'] = response.xpath('//*[@id="content"]/header[1]/div[2]/span[2]/label/text()').get()
+            required = response.xpath('//*[@id="path-getV2GroupsGroup_uuidMemberships"]/div/div[1]/div/div[2]/text()').get()
+            if required is not None:
+                if "required" in required:
+                    req_param = end_item['req_param']
+                else:
+                    pass
+            else:
+                req_param = ""
+
            end_item['inclusions'].append(response.xpath('//*[starts-with(@id,"query")]/div/p/code/span[position() >= 1 and not(position() > 15)]/text()').get())
-            #end_item['params'].append(response.xpath('//*[starts-with(@id,"path"]/div/div[1]/div/label/text()').get())
            if end_item['inclusions'] is not None:
                inclusions = end_item['inclusions']
            else:
@ -43,6 +55,7 @@ class ApiSpider(scrapy.Spider):
                    end_item['title'] : {
                        'method' : end_item['method'],
                        'endpoint' : end_item['endpoint'],
+                        'req_param' : req_param,
                        'inclusions' : inclusions
                        }
                    }
@ -53,7 +66,7 @@ class ApiSpider(scrapy.Spider):
        self.log(northpass_endpoints)

        next_page = response.xpath('//*[@id="content"]/div[4]/nav/a[2]/@href').get()
-        self.log(f"The next page is {next_page}")
+        # self.log(f"The next page is {next_page}")

        if next_page is not None:
            nexturl = response.urljoin(next_page)