Update openapi.json

2024-11-16 11:42:24 +08:00 · 2024-06-10 18:26:25 -07:00 · 2024-06-10 18:26:25 -07:00 · 9390816c1b
commit 9390816c1b
parent 149d79a529
1 changed files with 28 additions and 1 deletions
--- a/apps/api/openapi.json
+++ b/apps/api/openapi.json
@ -51,10 +51,19 @@
                        "description": "Include the raw HTML content of the page. Will output a html key in the response.",
                        "default": false
                      },
+                      "screenshot": {
+                        "type": "boolean",
+                        "description": "Include a screenshot of the top of the page that you are scraping.",
+                        "default": false
+                      },
                      "waitFor": {
                        "type": "integer",
                        "description": "Wait x amount of milliseconds for the page to load to fetch content",
                        "default": 0
+                      },
+                      "headers": {
+                        "type": "object",
+                        "description": "Headers to send with the request. Can be used to send cookies, user-agent, etc."
                      }
                    }
                  },
@ -176,6 +185,11 @@
                        "description": "The crawling mode to use. Fast mode crawls 4x faster websites without sitemap, but may not be as accurate and shouldn't be used in heavy js-rendered websites.",
                        "default": "default"
                      },
+                      "ignoreSitemap": {
+                        "type": "boolean",
+                        "description": "Ignore the website sitemap when crawling",
+                        "default": false
+                      },
                      "limit": {
                        "type": "integer",
                        "description": "Maximum number of pages to crawl",
@ -195,6 +209,15 @@
                        "type": "boolean",
                        "description": "Include the raw HTML content of the page. Will output a html key in the response.",
                        "default": false
+                      },
+                      "screenshot": {
+                        "type": "boolean",
+                        "description": "Include a screenshot of the top of the page that you are scraping.",
+                        "default": false
+                      },
+                      "headers": {
+                        "type": "object",
+                        "description": "Headers to send with the request when scraping. Can be used to send cookies, user-agent, etc."
                      }
                    }
                  }
@ -368,7 +391,7 @@
                      "items": {
                        "$ref": "#/components/schemas/CrawlStatusResponseObj"
                      },
-                      "description": "Partial documents returned as it is being crawls (streaming). When a page is ready it will append to the parial_data array - so no need to wait for all the website to be crawled."
+                      "description": "Partial documents returned as it is being crawled (streaming). **This feature is currently in alpha - expect breaking changes** When a page is ready, it will append to the partial_data array, so there is no need to wait for the entire website to be crawled. There is a max of 50 items in the array response. The oldest item (top of the array) will be removed when the new item is added to the array."
                    }
                  }
                }
@ -513,6 +536,10 @@
            "nullable": true,
            "description": "Raw HTML content of the page if `includeHtml`  is true"
          },
+          "index": {
+            "type": "integer",
+            "description": "The number of the page that was crawled. This is useful for `partial_data` so you know which page the data is from." 
+          },
          "metadata": {
            "type": "object",
            "properties": {