commit 4fb10c8ff34a32c7efd2394b60af6c5894e88038
Author: Normanras <44226464+Normanras@users.noreply.github.com>
Date:   Mon Feb 20 14:41:34 2023 -0500

    first commit

diff --git a/apiscraper/__init__.py b/apiscraper/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/apiscraper/__pycache__/__init__.cpython-311.pyc b/apiscraper/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000..ac94e95
Binary files /dev/null and b/apiscraper/__pycache__/__init__.cpython-311.pyc differ
diff --git a/apiscraper/__pycache__/items.cpython-311.pyc b/apiscraper/__pycache__/items.cpython-311.pyc
new file mode 100644
index 0000000..1ee7fe6
Binary files /dev/null and b/apiscraper/__pycache__/items.cpython-311.pyc differ
diff --git a/apiscraper/__pycache__/pipelines.cpython-311.pyc b/apiscraper/__pycache__/pipelines.cpython-311.pyc
new file mode 100644
index 0000000..03d8bd3
Binary files /dev/null and b/apiscraper/__pycache__/pipelines.cpython-311.pyc differ
diff --git a/apiscraper/__pycache__/settings.cpython-311.pyc b/apiscraper/__pycache__/settings.cpython-311.pyc
new file mode 100644
index 0000000..daf1843
Binary files /dev/null and b/apiscraper/__pycache__/settings.cpython-311.pyc differ
diff --git a/apiscraper/items.py b/apiscraper/items.py
new file mode 100644
index 0000000..1786649
--- /dev/null
+++ b/apiscraper/items.py
@@ -0,0 +1,10 @@
+import scrapy
+from scrapy.item import Item, Field
+
+
+class EndpointItems(Item):
+    title = Field()
+    method = Field()
+    endpoint = Field()
+    inclusions = Field()
+    params = Field()
diff --git a/apiscraper/middlewares.py b/apiscraper/middlewares.py
new file mode 100644
index 0000000..444e8e8
--- /dev/null
+++ b/apiscraper/middlewares.py
@@ -0,0 +1,103 @@
+# Define here the models for your spider middleware
+#
+# See documentation in:
+# https://docs.scrapy.org/en/latest/topics/spider-middleware.html
+
+from scrapy import signals
+
+# useful for handling different item types with a single interface
+from itemadapter import is_item, ItemAdapter
+
+
+class ApiscraperSpiderMiddleware:
+    # Not all methods need to be defined. If a method is not defined,
+    # scrapy acts as if the spider middleware does not modify the
+    # passed objects.
+
+    @classmethod
+    def from_crawler(cls, crawler):
+        # This method is used by Scrapy to create your spiders.
+        s = cls()
+        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
+        return s
+
+    def process_spider_input(self, response, spider):
+        # Called for each response that goes through the spider
+        # middleware and into the spider.
+
+        # Should return None or raise an exception.
+        return None
+
+    def process_spider_output(self, response, result, spider):
+        # Called with the results returned from the Spider, after
+        # it has processed the response.
+
+        # Must return an iterable of Request, or item objects.
+        for i in result:
+            yield i
+
+    def process_spider_exception(self, response, exception, spider):
+        # Called when a spider or process_spider_input() method
+        # (from other spider middleware) raises an exception.
+
+        # Should return either None or an iterable of Request or item objects.
+        pass
+
+    def process_start_requests(self, start_requests, spider):
+        # Called with the start requests of the spider, and works
+        # similarly to the process_spider_output() method, except
+        # that it doesn’t have a response associated.
+
+        # Must return only requests (not items).
+        for r in start_requests:
+            yield r
+
+    def spider_opened(self, spider):
+        spider.logger.info("Spider opened: %s" % spider.name)
+
+
+class ApiscraperDownloaderMiddleware:
+    # Not all methods need to be defined. If a method is not defined,
+    # scrapy acts as if the downloader middleware does not modify the
+    # passed objects.
+
+    @classmethod
+    def from_crawler(cls, crawler):
+        # This method is used by Scrapy to create your spiders.
+        s = cls()
+        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
+        return s
+
+    def process_request(self, request, spider):
+        # Called for each request that goes through the downloader
+        # middleware.
+
+        # Must either:
+        # - return None: continue processing this request
+        # - or return a Response object
+        # - or return a Request object
+        # - or raise IgnoreRequest: process_exception() methods of
+        #   installed downloader middleware will be called
+        return None
+
+    def process_response(self, request, response, spider):
+        # Called with the response returned from the downloader.
+
+        # Must either;
+        # - return a Response object
+        # - return a Request object
+        # - or raise IgnoreRequest
+        return response
+
+    def process_exception(self, request, exception, spider):
+        # Called when a download handler or a process_request()
+        # (from other downloader middleware) raises an exception.
+
+        # Must either:
+        # - return None: continue processing this exception
+        # - return a Response object: stops process_exception() chain
+        # - return a Request object: stops process_exception() chain
+        pass
+
+    def spider_opened(self, spider):
+        spider.logger.info("Spider opened: %s" % spider.name)
diff --git a/apiscraper/pipelines.py b/apiscraper/pipelines.py
new file mode 100644
index 0000000..142ad54
--- /dev/null
+++ b/apiscraper/pipelines.py
@@ -0,0 +1,31 @@
+from itemadapter import ItemAdapter
+#import sqlite3
+
+
+class ApiscraperPipeline:
+    #def __init__(self):
+    #    self.con = sqlite3.connect('api.db')
+    #    self.cur = self.con.cursor()
+#
+#        self.cur.execute("""
+#        CREATE TABLE IF NOT EXISTS api(
+#            title TEXT,
+#            method TEXT,
+#            endpoint TEXT,
+#            inclusions TEXT
+#        )
+#        """)
+
+    def process_item(self, item, spider):
+ #       self.cur.execute("""
+ #           INSERT INTO api (title, method, endpoint, inclusions) VALUES (?,?,?,?)
+ #       """,
+ #       (
+ #           item['title'],
+ #           item['method'],
+ #           item['endpoint'],
+ #           item['inclusions'],
+ #       ))
+
+  #      self.con.commit()
+        return item
diff --git a/apiscraper/settings.py b/apiscraper/settings.py
new file mode 100644
index 0000000..bed019d
--- /dev/null
+++ b/apiscraper/settings.py
@@ -0,0 +1,34 @@
+BOT_NAME = "apiscraper"
+
+SPIDER_MODULES = ["apiscraper.spiders"]
+NEWSPIDER_MODULE = "apiscraper.spiders"
+
+# Obey robots.txt rules
+ROBOTSTXT_OBEY = True
+
+# plywright required settings
+DOWNLOAD_HANDLERS = {
+    "http": "scrapy_playwright.handler.ScrapyPlaywrightDownloadHandler",
+    "https": "scrapy_playwright.handler.ScrapyPlaywrightDownloadHandler",
+}
+PLAYWRIGHT_LAUNCH_OPTIONS = {"headless":True}
+PLAYWRIGHT_DEFAULT_NAVIGATION_TIMEOUT = 100000
+
+# Set settings whose default value is deprecated to a future-proof value
+REQUEST_FINGERPRINTER_IMPLEMENTATION = "2.7"
+TWISTED_REACTOR = "twisted.internet.asyncioreactor.AsyncioSelectorReactor"
+FEED_EXPORT_ENCODING = "utf-8"
+
+#ITEM_PIPELINES = {
+#        'apiscraper.pipelines.ApiscraperPipeline':10000,
+#        }
+
+#FEEDS = {
+#        'northpass_api.json': {
+            #'pages' : {
+#            'method' : 'string',
+#            'endpoint' : 'string',
+#            'headers' : 'headers',
+#            }
+#           }
+#        }
diff --git a/apiscraper/spiders/.DS_Store b/apiscraper/spiders/.DS_Store
new file mode 100644
index 0000000..5008ddf
Binary files /dev/null and b/apiscraper/spiders/.DS_Store differ
diff --git a/apiscraper/spiders/.null-ls_505405_dev_spider.py b/apiscraper/spiders/.null-ls_505405_dev_spider.py
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/apiscraper/spiders/.null-ls_505405_dev_spider.py
@@ -0,0 +1 @@
+
diff --git a/apiscraper/spiders/__init__.py b/apiscraper/spiders/__init__.py
new file mode 100644
index 0000000..ebd689a
--- /dev/null
+++ b/apiscraper/spiders/__init__.py
@@ -0,0 +1,4 @@
+# This package will contain the spiders of your Scrapy project
+#
+# Please refer to the documentation for information on how to create and manage
+# your spiders.
diff --git a/apiscraper/spiders/__pycache__/__init__.cpython-311.pyc b/apiscraper/spiders/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000..febcc10
Binary files /dev/null and b/apiscraper/spiders/__pycache__/__init__.cpython-311.pyc differ
diff --git a/apiscraper/spiders/__pycache__/dev_spider.cpython-311.pyc b/apiscraper/spiders/__pycache__/dev_spider.cpython-311.pyc
new file mode 100644
index 0000000..f61f678
Binary files /dev/null and b/apiscraper/spiders/__pycache__/dev_spider.cpython-311.pyc differ
diff --git a/apiscraper/spiders/api.db b/apiscraper/spiders/api.db
new file mode 100644
index 0000000..17954a5
Binary files /dev/null and b/apiscraper/spiders/api.db differ
diff --git a/apiscraper/spiders/dev_spider.py b/apiscraper/spiders/dev_spider.py
new file mode 100644
index 0000000..8940968
--- /dev/null
+++ b/apiscraper/spiders/dev_spider.py
@@ -0,0 +1,71 @@
+from pathlib import Path
+import scrapy
+from scrapy_playwright.page import PageMethod
+from apiscraper.items import EndpointItems
+
+class ApiSpider(scrapy.Spider):
+    name = "api"
+
+    def start_requests(self):
+        urls = [
+                "https://developers.northpass.com/reference/get_v2-activities",
+                ]
+        for url in urls:
+            yield scrapy.Request(url=url, meta=dict(
+                playwright = True,
+                playwright_include_page = True,
+                playwright_page_methods =[PageMethod('wait_for_selector', '//*[@id="content"]/div[4]/nav'),
+                                          ],
+                errback=self.errback,
+                ))
+
+    async def parse(self, response):
+        page = response.meta["playwright_page"]
+        await page.close()
+
+        northpass_endpoints = {}
+
+        end_item = EndpointItems()
+        end_item['inclusions'] = []
+        end_item['params'] = []
+        for item in response.xpath('//*[@id="Explorer"]'):
+            end_item['title'] = response.xpath('//*[@id="content"]/header[1]/div[1]/h1/text()').get()
+            end_item['method'] = response.xpath('//*[@id="content"]/header[1]/div[2]/span[1]/text()').get()
+            end_item['endpoint'] = response.xpath('//*[@id="content"]/header[1]/div[2]/span[2]/text()[2]').get()
+            end_item['inclusions'].append(response.xpath('//*[starts-with(@id,"query")]/div/p/code/span[position() >= 1 and not(position() > 15)]/text()').get())
+            #end_item['params'].append(response.xpath('//*[starts-with(@id,"path"]/div/div[1]/div/label/text()').get())
+            if end_item['inclusions'] is not None:
+                inclusions = end_item['inclusions']
+            else:
+                pass
+
+            northpass_endpoints = {
+                    end_item['title'] : {
+                        'method' : end_item['method'],
+                        'endpoint' : end_item['endpoint'],
+                        'inclusions' : inclusions
+                        }
+                    }
+
+            yield northpass_endpoints
+            yield EndpointItems()
+
+        self.log(northpass_endpoints)
+
+        next_page = response.xpath('//*[@id="content"]/div[4]/nav/a[2]/@href').get()
+        self.log(f"The next page is {next_page}")
+
+        if next_page is not None:
+            nexturl = response.urljoin(next_page)
+            yield scrapy.Request(nexturl, meta=dict(
+                playwright = True,
+                playwright_include_page = True,
+                playwright_page_methods =[
+                    PageMethod('wait_for_selector', '//*[@id="content"]/div[4]/nav'),
+                ],
+                errback=self.errback,
+                ))
+
+    async def errback(self, failure):
+        page = failure.request.meta["playwright_page"]
+        await page.close()
diff --git a/apiscraper/spiders/outputtest.json b/apiscraper/spiders/outputtest.json
new file mode 100644
index 0000000..f0331a9
--- /dev/null
+++ b/apiscraper/spiders/outputtest.json
@@ -0,0 +1,202 @@
+[
+{"List activities": {"method": "get", "endpoint": "/v2/activities"}},
+{},
+{"Fetch an activity": {"method": "get", "endpoint": "/v2/activities/"}},
+{},
+{"BambooHR configuration": {"method": "get", "endpoint": "/v2/apps/bamboo_hr"}},
+{},
+{"Update BambooHR configuration": {"method": "put", "endpoint": "/v2/apps/bamboo_hr"}},
+{},
+{"Delete BambooHR configuration": {"method": "delete", "endpoint": "/v2/apps/bamboo_hr"}},
+{},
+{"List assignment submissions": {"method": "get", "endpoint": "/v2/assignments/"}},
+{},
+{"Fetch an assignment submission": {"method": "get", "endpoint": "/v2/assignments/"}},
+{},
+{"List all assignments": {"method": "get", "endpoint": "/v2/assignments"}},
+{},
+{"Bulk add courses to groups": {"method": "post", "endpoint": "/v2/bulk/groups/courses"}},
+{},
+{"Bulk create groups for a school": {"method": "post", "endpoint": "/v2/bulk/groups"}},
+{},
+{"Bulk add people to groups": {"method": "post", "endpoint": "/v2/bulk/people/membership"}},
+{},
+{"Bulk enroll people to courses [deprecated]": {"method": "post", "endpoint": "/v2/bulk/enrollments"}},
+{},
+{"Bulk enroll people to courses": {"method": "post", "endpoint": "/v2/bulk/people/course"}},
+{},
+{"Bulk invite people to a school": {"method": "post", "endpoint": "/v2/bulk/people"}},
+{},
+{"Bulk resend school inviation to people": {"method": "post", "endpoint": "/v2/bulk/people/resend_invitation"}},
+{},
+{"List categories": {"method": "get", "endpoint": "/v2/categories"}},
+{},
+{"Create a category": {"method": "post", "endpoint": "/v2/categories"}},
+{},
+{"Fetch a category": {"method": "get", "endpoint": "/v2/categories/"}},
+{},
+{"Update a category": {"method": "put", "endpoint": "/v2/categories/"}},
+{},
+{"Delete a category": {"method": "delete", "endpoint": "/v2/categories/"}},
+{},
+{"Resend communication": {"method": "post", "endpoint": "/v2/communications/deliveries/"}},
+{},
+{"List deliveries": {"method": "get", "endpoint": "/v2/communications/deliveries/"}},
+{},
+{"Update attendance confirmation notification": {"method": "put", "endpoint": "/v2/communications/emails/attendance_confirmation"}},
+{},
+{"Update courses incomplete notification": {"method": "put", "endpoint": "/v2/communications/emails/courses_incomplete_notification"}},
+{},
+{"Update new courses notification": {"method": "put", "endpoint": "/v2/communications/emails/new_courses_notification"}},
+{},
+{"Update training session registration notification": {"method": "put", "endpoint": "/v2/communications/emails/training_session_registration_confirmation"}},
+{},
+{"List activities for a course": {"method": "get", "endpoint": "/v2/courses/"}},
+{},
+{"List groups not yet associated with course": {"method": "get", "endpoint": "/v2/courses/"}},
+{},
+{"List people not yet enrolled with course": {"method": "get", "endpoint": "/v2/courses/"}},
+{},
+{"List enrollments for a course": {"method": "get", "endpoint": "/v2/courses/"}},
+{},
+{"Retake a course": {"method": "post", "endpoint": "/v2/courses/"}},
+{},
+{"List courses": {"method": "get", "endpoint": "/v2/courses"}},
+{},
+{"List credential achievements": {"method": "get", "endpoint": "/v2/credentials/"}},
+{},
+{"List credential courses": {"method": "get", "endpoint": "/v2/credentials/"}},
+{},
+{"Delete course credential": {"method": "delete", "endpoint": "/v2/credentials/"}},
+{},
+{"List credentials": {"method": "get", "endpoint": "/v2/credentials"}},
+{},
+{"Delete a credential": {"method": "delete", "endpoint": "/v2/credentials/"}},
+{},
+{"List custom templates": {"method": "get", "endpoint": "/v2/custom_templates"}},
+{},
+{"Create a custom template": {"method": "post", "endpoint": "/v2/custom_templates"}},
+{},
+{"Delete a custom template": {"method": "delete", "endpoint": "/v2/custom_templates/"}},
+{},
+{"List email senders": {"method": "get", "endpoint": "/v2/email_domains/"}},
+{},
+{"Update an email sender": {"method": "put", "endpoint": "/v2/email_domains/"}},
+{},
+{"Delete an email sender": {"method": "delete", "endpoint": "/v2/email_domains/"}},
+{},
+{"List email domains": {"method": "get", "endpoint": "/v2/email_domains"}},
+{},
+{"Create an email domain": {"method": "post", "endpoint": "/v2/email_domains"}},
+{},
+{"Delete an email domain": {"method": "delete", "endpoint": "/v2/email_domains/"}},
+{},
+{"List events": {"method": "get", "endpoint": "/v2/events"}},
+{},
+{"List group's courses": {"method": "get", "endpoint": "/v2/groups/"}},
+{},
+{"List group's memberships": {"method": "get", "endpoint": "/v2/groups/"}},
+{},
+{"Add courses to a group": {"method": "post", "endpoint": "/v2/groups/"}},
+{},
+{"Add people to a group": {"method": "post", "endpoint": "/v2/groups/"}},
+{},
+{"List groups": {"method": "get", "endpoint": "/v2/groups"}},
+{},
+{"Create a group": {"method": "post", "endpoint": "/v2/groups"}},
+{},
+{"Fetch a group": {"method": "get", "endpoint": "/v2/groups/"}},
+{},
+{"Update a group": {"method": "put", "endpoint": "/v2/groups/"}},
+{},
+{"Delete a group": {"method": "delete", "endpoint": "/v2/groups/"}},
+{},
+{"Deactivate a person": {"method": "post", "endpoint": "/v2/people/"}},
+{},
+{"Reactivate a person": {"method": "delete", "endpoint": "/v2/people/"}},
+{},
+{"Associate a person with a course": {"method": "post", "endpoint": "/v2/people/"}},
+{},
+{"Remove a person from a course": {"method": "delete", "endpoint": "/v2/people/"}},
+{},
+{"Add groups to a person": {"method": "post", "endpoint": "/v2/people/"}},
+{},
+{"Remove a person from a group": {"method": "delete", "endpoint": "/v2/people/"}},
+{},
+{"List people": {"method": "get", "endpoint": "/v2/people"}},
+{},
+{"Create a person": {"method": "post", "endpoint": "/v2/people"}},
+{},
+{"Fetch a person": {"method": "get", "endpoint": "/v2/people/"}},
+{},
+{"Update a person": {"method": "put", "endpoint": "/v2/people/"}},
+{},
+{"Delete a person": {"method": "delete", "endpoint": "/v2/people/"}},
+{},
+{"List question banks": {"method": "get", "endpoint": "/v2/question_banks"}},
+{},
+{"Create a question bank": {"method": "post", "endpoint": "/v2/question_banks"}},
+{},
+{"Fetch a question bank": {"method": "get", "endpoint": "/v2/question_banks/"}},
+{},
+{"List learner quiz answers": {"method": "get", "endpoint": "/v2/quiz_attempts/"}},
+{},
+{"List quizzes": {"method": "get", "endpoint": "/v2/quizzes"}},
+{},
+{"List all assignment submissions": {"method": "get", "endpoint": "/v2/submissions"}},
+{},
+{"List all configured webhook endpoints": {"method": "get", "endpoint": "/v2/webhook_endpoints"}},
+{},
+{"Creates new webhook endpoint": {"method": "post", "endpoint": "/v2/webhook_endpoints"}},
+{},
+{"Updates webhook endpoint": {"method": "put", "endpoint": "/v2/webhook_endpoints/"}},
+{},
+{"Delete a webhook endpoint": {"method": "delete", "endpoint": "/v2/webhook_endpoints/"}},
+{},
+{"List all sent webhooks": {"method": "get", "endpoint": "/v2/webhooks"}},
+{},
+{"Bulk fill courses property values": {"method": "post", "endpoint": "/v2/properties/courses/bulk"}},
+{},
+{"Bulk clear courses property values": {"method": "delete", "endpoint": "/v2/properties/courses/bulk"}},
+{},
+{"Bulk fill people property values": {"method": "post", "endpoint": "/v2/properties/people/bulk"}},
+{},
+{"Bulk clear people property values": {"method": "delete", "endpoint": "/v2/properties/people/bulk"}},
+{},
+{"List properties available for courses": {"method": "get", "endpoint": "/v2/properties/courses/properties"}},
+{},
+{"Get available course properties with values": {"method": "get", "endpoint": "/v2/properties/courses/"}},
+{},
+{"List created property definitions": {"method": "get", "endpoint": "/v2/properties/conditional"}},
+{},
+{"Create conditional property with mapping": {"method": "post", "endpoint": "/v2/properties/conditional"}},
+{},
+{"Delete conditional property": {"method": "delete", "endpoint": "/v2/properties/conditional/"}},
+{},
+{"Update conditional property": {"method": "patch", "endpoint": "/v2/properties/conditional/"}},
+{},
+{"List properties available for people": {"method": "get", "endpoint": "/v2/properties/people/properties"}},
+{},
+{"Get available person properties with values": {"method": "get", "endpoint": "/v2/properties/people/"}},
+{},
+{"List unique values for property": {"method": "get", "endpoint": "/v2/properties/property_definitions/"}},
+{},
+{"List created property definitions": {"method": "get", "endpoint": "/v2/properties/property_definitions"}},
+{},
+{"Create property definition": {"method": "post", "endpoint": "/v2/properties/property_definitions"}},
+{},
+{"Get property definition": {"method": "get", "endpoint": "/v2/properties/property_definitions/"}},
+{},
+{"Delete property definition": {"method": "delete", "endpoint": "/v2/properties/property_definitions/"}},
+{},
+{"Create property definition": {"method": "patch", "endpoint": "/v2/properties/property_definitions/"}},
+{},
+{"List properties available for school": {"method": "get", "endpoint": "/v2/properties/school/properties"}},
+{},
+{"Get available school properties with values": {"method": "get", "endpoint": "/v2/properties/school"}},
+{},
+{"Clear school properties with values": {"method": "delete", "endpoint": "/v2/properties/school"}},
+{},
+{"Fill school properties with values": {"method": "patch", "endpoint": "/v2/properties/school"}},
+{}
+]
\ No newline at end of file
diff --git a/scrapy.cfg b/scrapy.cfg
new file mode 100644
index 0000000..5380221
--- /dev/null
+++ b/scrapy.cfg
@@ -0,0 +1,11 @@
+# Automatically created by: scrapy startproject
+#
+# For more information about the [deploy] section see:
+# https://scrapyd.readthedocs.io/en/latest/deploy.html
+
+[settings]
+default = apiscraper.settings
+
+[deploy]
+#url = http://localhost:6800/
+project = apiscraper