Merge branch 'main' into f/rust-sdk

2024-11-16 11:42:24 +08:00 · 2024-08-29 16:20:03 -03:00 · 2024-08-29 16:20:03 -03:00 · eec6d86802
commit eec6d86802
parent 829d115fc9 1174bc3c82
163 changed files with 18655 additions and 4793 deletions
--- a/.github/workflows/fly-direct.yml
+++ b/.github/workflows/fly-direct.yml
@ -22,16 +22,19 @@ env:
  SUPABASE_SERVICE_TOKEN: ${{ secrets.SUPABASE_SERVICE_TOKEN }}
  SUPABASE_URL: ${{ secrets.SUPABASE_URL }}
  TEST_API_KEY: ${{ secrets.TEST_API_KEY }}
  SENTRY_AUTH_TOKEN: ${{ secrets.SENTRY_AUTH_TOKEN }}
 jobs:
  deploy:
    name: Deploy app
    runs-on: ubuntu-latest
    timeout-minutes: 15
    steps:
      - uses: actions/checkout@v3
      - uses: superfly/flyctl-actions/setup-flyctl@master
-      - run: flyctl deploy --remote-only -a firecrawl-scraper-js
+      - run: flyctl deploy --remote-only -a firecrawl-scraper-js --build-secret SENTRY_AUTH_TOKEN=$SENTRY_AUTH_TOKEN
        working-directory: ./apps/api
        env:
          FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }}
          BULL_AUTH_KEY: ${{ secrets.BULL_AUTH_KEY }}
          SENTRY_AUTH_TOKEN: ${{ secrets.SENTRY_AUTH_TOKEN }}
--- a/.github/workflows/fly.yml
+++ b/.github/workflows/fly.yml
@ -27,6 +27,7 @@ env:
  PYPI_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
  NPM_TOKEN: ${{ secrets.NPM_TOKEN }}
  CRATES_IO_TOKEN: ${{ secrets.CRATES_IO_TOKEN }}
  SENTRY_AUTH_TOKEN: ${{ secrets.SENTRY_AUTH_TOKEN }}
 jobs:
  pre-deploy-e2e-tests:
@ -132,7 +133,7 @@ jobs:
        working-directory: ./apps/python-sdk
      - name: Run E2E tests for Python SDK
        run: |
-          pytest firecrawl/__tests__/e2e_withAuth/test.py
+          pytest firecrawl/__tests__/v1/e2e_withAuth/test.py
        working-directory: ./apps/python-sdk
  js-sdk-tests:
@ -247,11 +248,12 @@ jobs:
    steps:
      - uses: actions/checkout@v3
      - uses: superfly/flyctl-actions/setup-flyctl@master
-      - run: flyctl deploy --remote-only -a firecrawl-scraper-js
+      - run: flyctl deploy --remote-only -a firecrawl-scraper-js --build-secret SENTRY_AUTH_TOKEN=$SENTRY_AUTH_TOKEN
        working-directory: ./apps/api
        env:
          FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }}
          BULL_AUTH_KEY: ${{ secrets.BULL_AUTH_KEY }}
          SENTRY_AUTH_TOKEN: ${{ secrets.SENTRY_AUTH_TOKEN }}
  build-and-publish-python-sdk:
    name: Build and publish Python SDK
--- a/.gitignore
+++ b/.gitignore
@ -19,3 +19,5 @@ apps/test-suite/load-test-results/test-run-report.json
 apps/playwright-service-ts/node_modules/
 apps/playwright-service-ts/package-lock.json
 *.pyc
 .rdb
--- a/.gitmodules
+++ b/.gitmodules
@ -1,6 +1,6 @@
-[submodule "apps/go-sdk/firecrawl"]
+[submodule "apps/go-sdk/firecrawl-go"]
-	path = apps/go-sdk/firecrawl
+	path = apps/go-sdk/firecrawl-go
 	url = https://github.com/mendableai/firecrawl-go
-[submodule "apps/go-sdk/examples"]
+[submodule "apps/go-sdk/firecrawl-go-examples"]
-	path = apps/go-sdk/examples
+	path = apps/go-sdk/firecrawl-go-examples
 	url = https://github.com/mendableai/firecrawl-go-examples
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@ -44,7 +44,6 @@ BULL_AUTH_KEY= @
 LOGTAIL_KEY= # Use if you're configuring basic logging with logtail
 PLAYWRIGHT_MICROSERVICE_URL=  # set if you'd like to run a playwright fallback
 LLAMAPARSE_API_KEY= #Set if you have a llamaparse key you'd like to use to parse pdfs
 SERPER_API_KEY= #Set if you have a serper key you'd like to use as a search api
 SLACK_WEBHOOK_URL= # set if you'd like to send slack server health status messages
 POSTHOG_API_KEY= # set if you'd like to send posthog events like job logs
 POSTHOG_HOST= # set if you'd like to send posthog events like job logs
--- a/README.md
+++ b/README.md
@ -6,7 +6,7 @@ _This repository is in its early development stages. We are still merging custom
 ## What is Firecrawl?
-[Firecrawl](https://firecrawl.dev?ref=github) is an API service that takes a URL, crawls it, and converts it into clean markdown or structured data. We crawl all accessible subpages and give you clean data for each. No sitemap required.
+[Firecrawl](https://firecrawl.dev?ref=github) is an API service that takes a URL, crawls it, and converts it into clean markdown or structured data. We crawl all accessible subpages and give you clean data for each. No sitemap required. Check out our [documentation](https://docs.firecrawl.dev).
 _Pst. hey, you, join our stargazers :)_
@ -41,18 +41,26 @@ To use the API, you need to sign up on [Firecrawl](https://firecrawl.dev) and ge
 Used to crawl a URL and all accessible subpages. This submits a crawl job and returns a job ID to check the status of the crawl.
 ```bash
-curl -X POST https://api.firecrawl.dev/v0/crawl \
+curl -X POST https://api.firecrawl.dev/v1/crawl \
    -H 'Content-Type: application/json' \
-    -H 'Authorization: Bearer YOUR_API_KEY' \
+    -H 'Authorization: Bearer fc-YOUR_API_KEY' \
    -d '{
-      "url": "https://mendable.ai"
+      "url": "https://docs.firecrawl.dev",
      "limit": 100,
      "scrapeOptions": {
        "formats": ["markdown", "html"]
      }
    }'
 ```
-Returns a jobId
+Returns a crawl job id and the url to check the status of the crawl.
 ```json
-{ "jobId": "1234-5678-9101" }
+{
  "success": true,
  "id": "123-456-789",
  "url": "https://api.firecrawl.dev/v1/crawl/123-456-789"
 }
 ```
 ### Check Crawl Job
@ -60,7 +68,7 @@ Returns a jobId
 Used to check the status of a crawl job and get its result.
 ```bash
-curl -X GET https://api.firecrawl.dev/v0/crawl/status/1234-5678-9101 \
+curl -X GET https://api.firecrawl.dev/v1/crawl/123-456-789 \
  -H 'Content-Type: application/json' \
  -H 'Authorization: Bearer YOUR_API_KEY'
 ```
@ -68,18 +76,20 @@ curl -X GET https://api.firecrawl.dev/v0/crawl/status/1234-5678-9101 \
 ```json
 {
  "status": "completed",
-  "current": 22,
+  "total": 36,
-  "total": 22,
+  "creditsUsed": 36,
  "expiresAt": "2024-00-00T00:00:00.000Z",
  "data": [
    {
-      "content": "Raw Content ",
+      "markdown": "[Firecrawl Docs home page![light logo](https://mintlify.s3-us-west-1.amazonaws.com/firecrawl/logo/light.svg)!...",
-      "markdown": "# Markdown Content",
+      "html": "<!DOCTYPE html><html lang=\"en\" class=\"js-focus-visible lg:[--scroll-mt:9.5rem]\" data-js-focus-visible=\"\">...",
      "provider": "web-scraper",
      "metadata": {
-        "title": "Mendable | AI for CX and Sales",
+        "title": "Build a 'Chat with website' using Groq Llama 3 | Firecrawl",
-        "description": "AI for CX and Sales",
+        "language": "en",
-        "language": null,
+        "sourceURL": "https://docs.firecrawl.dev/learn/rag-llama3",
-        "sourceURL": "https://www.mendable.ai/"
+        "description": "Learn how to use Firecrawl, Groq Llama 3, and Langchain to build a 'Chat with your website' bot.",
        "ogLocaleAlternate": [],
        "statusCode": 200
      }
    }
  ]
@ -88,14 +98,15 @@ curl -X GET https://api.firecrawl.dev/v0/crawl/status/1234-5678-9101 \
 ### Scraping
-Used to scrape a URL and get its content.
+Used to scrape a URL and get its content in the specified formats.
 ```bash
-curl -X POST https://api.firecrawl.dev/v0/scrape \
+curl -X POST https://api.firecrawl.dev/v1/scrape \
    -H 'Content-Type: application/json' \
    -H 'Authorization: Bearer YOUR_API_KEY' \
    -d '{
-      "url": "https://mendable.ai"
+      "url": "https://docs.firecrawl.dev",
      "formats" : ["markdown", "html"]
    }'
 ```
@ -105,55 +116,83 @@ Response:
 {
  "success": true,
  "data": {
-    "content": "Raw Content ",
+    "markdown": "Launch Week I is here! [See our Day 2 Release 🚀](https://www.firecrawl.dev/blog/launch-week-i-day-2-doubled-rate-limits)[💥 Get 2 months free...",
-    "markdown": "# Markdown Content",
+    "html": "<!DOCTYPE html><html lang=\"en\" class=\"light\" style=\"color-scheme: light;\"><body class=\"__variable_36bd41 __variable_d7dc5d font-inter ...",
    "provider": "web-scraper",
    "metadata": {
-      "title": "Mendable | AI for CX and Sales",
+      "title": "Home - Firecrawl",
-      "description": "AI for CX and Sales",
+      "description": "Firecrawl crawls and converts any website into clean markdown.",
-      "language": null,
+      "language": "en",
-      "sourceURL": "https://www.mendable.ai/"
+      "keywords": "Firecrawl,Markdown,Data,Mendable,Langchain",
      "robots": "follow, index",
      "ogTitle": "Firecrawl",
      "ogDescription": "Turn any website into LLM-ready data.",
      "ogUrl": "https://www.firecrawl.dev/",
      "ogImage": "https://www.firecrawl.dev/og.png?123",
      "ogLocaleAlternate": [],
      "ogSiteName": "Firecrawl",
      "sourceURL": "https://firecrawl.dev",
      "statusCode": 200
    }
  }
 }
 ```
-### Search (Beta)
+### Map (Alpha)
-Used to search the web, get the most relevant results, scrape each page and return the markdown.
+Used to map a URL and get urls of the website. This returns most links present on the website.
-```bash
+```bash cURL
-curl -X POST https://api.firecrawl.dev/v0/search \
+curl -X POST https://api.firecrawl.dev/v1/map \
    -H 'Content-Type: application/json' \
    -H 'Authorization: Bearer YOUR_API_KEY' \
    -d '{
-      "query": "firecrawl",
+      "url": "https://firecrawl.dev"
      "pageOptions": {
        "fetchPageContent": true // false for a fast serp api
      }
    }'
 ```
 Response:
 ```json
 {
-  "success": true,
+  "status": "success",
-  "data": [
+  "links": [
-    {
+    "https://firecrawl.dev",
-      "url": "https://mendable.ai",
+    "https://www.firecrawl.dev/pricing",
-      "markdown": "# Markdown Content",
+    "https://www.firecrawl.dev/blog",
-      "provider": "web-scraper",
+    "https://www.firecrawl.dev/playground",
-      "metadata": {
+    "https://www.firecrawl.dev/smart-crawl",
        "title": "Mendable | AI for CX and Sales",
        "description": "AI for CX and Sales",
        "language": null,
        "sourceURL": "https://www.mendable.ai/"
      }
    }
  ]
 }
 ```
-### Intelligent Extraction (Beta)
+#### Map with search
 Map with `search` param allows you to search for specific urls inside a website.
 ```bash cURL
 curl -X POST https://api.firecrawl.dev/v1/map \
    -H 'Content-Type: application/json' \
    -H 'Authorization: Bearer YOUR_API_KEY' \
    -d '{
      "url": "https://firecrawl.dev",
      "search": "docs"
    }'
 ```
 Response will be an ordered list from the most relevant to the least relevant.
 ```json
 {
  "status": "success",
  "links": [
    "https://docs.firecrawl.dev",
    "https://docs.firecrawl.dev/sdks/python",
    "https://docs.firecrawl.dev/learn/rag-llama3",
  ]
 }
 ```
 ### LLM Extraction (v0) (Beta)
 Used to extract structured data from scraped pages.
@ -220,6 +259,42 @@ curl -X POST https://api.firecrawl.dev/v0/scrape \
 }
 ```
 ### Search (v0) (Beta)
 Used to search the web, get the most relevant results, scrape each page and return the markdown.
 ```bash
 curl -X POST https://api.firecrawl.dev/v0/search \
    -H 'Content-Type: application/json' \
    -H 'Authorization: Bearer YOUR_API_KEY' \
    -d '{
      "query": "firecrawl",
      "pageOptions": {
        "fetchPageContent": true // false for a fast serp api
      }
    }'
 ```
 ```json
 {
  "success": true,
  "data": [
    {
      "url": "https://mendable.ai",
      "markdown": "# Markdown Content",
      "provider": "web-scraper",
      "metadata": {
        "title": "Mendable | AI for CX and Sales",
        "description": "AI for CX and Sales",
        "language": null,
        "sourceURL": "https://www.mendable.ai/"
      }
    }
  ]
 }
 ```
 ## Using Python SDK
 ### Installing Python SDK
@ -231,24 +306,28 @@ pip install firecrawl-py
 ### Crawl a website
 ```python
-from firecrawl import FirecrawlApp
+from firecrawl.firecrawl import FirecrawlApp
-app = FirecrawlApp(api_key="YOUR_API_KEY")
+app = FirecrawlApp(api_key="fc-YOUR_API_KEY")
-crawl_result = app.crawl_url('mendable.ai', {'crawlerOptions': {'excludes': ['blog/*']}})
+# Scrape a website:
 scrape_status = app.scrape_url(
  'https://firecrawl.dev', 
  params={'formats': ['markdown', 'html']}
 )
 print(scrape_status)
-# Get the markdown
+# Crawl a website:
-for result in crawl_result:
+crawl_status = app.crawl_url(
-    print(result['markdown'])
+  'https://firecrawl.dev', 
-```
+  params={
-
+    'limit': 100, 
-### Scraping a URL
+    'scrapeOptions': {'formats': ['markdown', 'html']}
-
+  }, 
-To scrape a single URL, use the `scrape_url` method. It takes the URL as a parameter and returns the scraped data as a dictionary.
+  wait_until_done=True, 
-
+  poll_interval=30
-```python
+)
-url = 'https://example.com'
+print(crawl_status)
 scraped_data = app.scrape_url(url)
 ```
 ### Extracting structured data from a URL
@ -256,6 +335,11 @@ scraped_data = app.scrape_url(url)
 With LLM extraction, you can easily extract structured data from any URL. We support pydantic schemas to make it easier for you too. Here is how you to use it:
 ```python
 from firecrawl.firecrawl import FirecrawlApp
 app = FirecrawlApp(api_key="fc-YOUR_API_KEY", version="v0")
 class ArticleSchema(BaseModel):
    title: str
    points: int
@ -277,15 +361,6 @@ data = app.scrape_url('https://news.ycombinator.com', {
 print(data["llm_extraction"])
 ```
 ### Search for a query
 Performs a web search, retrieve the top results, extract data from each page, and returns their markdown.
 ```python
 query = 'What is Mendable?'
 search_result = app.search(query)
 ```
 ## Using the Node SDK
 ### Installation
@ -301,54 +376,33 @@ npm install @mendable/firecrawl-js
 1. Get an API key from [firecrawl.dev](https://firecrawl.dev)
 2. Set the API key as an environment variable named `FIRECRAWL_API_KEY` or pass it as a parameter to the `FirecrawlApp` class.
 ### Scraping a URL
 To scrape a single URL with error handling, use the `scrapeUrl` method. It takes the URL as a parameter and returns the scraped data as a dictionary.
 ```js
-try {
+import FirecrawlApp, { CrawlParams, CrawlStatusResponse } from '@mendable/firecrawl-js';
-  const url = "https://example.com";
+
-  const scrapedData = await app.scrapeUrl(url);
+const app = new FirecrawlApp({apiKey: "fc-YOUR_API_KEY"});
-  console.log(scrapedData);
+
-} catch (error) {
+// Scrape a website
-  console.error("Error occurred while scraping:", error.message);
+const scrapeResponse = await app.scrapeUrl('https://firecrawl.dev', {
  formats: ['markdown', 'html'],
 });
 if (scrapeResponse) {
  console.log(scrapeResponse)
 }
 // Crawl a website
 const crawlResponse = await app.crawlUrl('https://firecrawl.dev', {
  limit: 100,
  scrapeOptions: {
    formats: ['markdown', 'html'],
  }
 } as CrawlParams, true, 30) as CrawlStatusResponse;
 if (crawlResponse) {
  console.log(crawlResponse)
 }
 ```
 ### Crawling a Website
 To crawl a website with error handling, use the `crawlUrl` method. It takes the starting URL and optional parameters as arguments. The `params` argument allows you to specify additional options for the crawl job, such as the maximum number of pages to crawl, allowed domains, and the output format.
 ```js
 const crawlUrl = "https://example.com";
 const params = {
  crawlerOptions: {
    excludes: ["blog/"],
    includes: [], // leave empty for all pages
    limit: 1000,
  },
  pageOptions: {
    onlyMainContent: true,
  },
 };
 const waitUntilDone = true;
 const timeout = 5;
 const crawlResult = await app.crawlUrl(
  crawlUrl,
  params,
  waitUntilDone,
  timeout
 );
 ```
 ### Checking Crawl Status
 To check the status of a crawl job with error handling, use the `checkCrawlStatus` method. It takes the job ID as a parameter and returns the current status of the crawl job.
 ```js
 const status = await app.checkCrawlStatus(jobId);
 console.log(status);
 ```
 ### Extracting structured data from a URL
@ -360,6 +414,7 @@ import { z } from "zod";
 const app = new FirecrawlApp({
  apiKey: "fc-YOUR_API_KEY",
  version: "v0"
 });
 // Define schema to extract contents into
@ -384,19 +439,6 @@ const scrapeResult = await app.scrapeUrl("https://news.ycombinator.com", {
 console.log(scrapeResult.data["llm_extraction"]);
 ```
 ### Search for a query
 With the `search` method, you can search for a query in a search engine and get the top results along with the page content for each result. The method takes the query as a parameter and returns the search results.
 ```js
 const query = "what is mendable?";
 const searchResults = await app.search(query, {
  pageOptions: {
    fetchPageContent: true, // Fetch the page content for each search result
  },
 });
 ```
 ## Contributing
 We love contributions! Please read our [contributing guide](CONTRIBUTING.md) before submitting a pull request.
--- a/SELF_HOST.md
+++ b/SELF_HOST.md
@ -65,7 +65,6 @@ BULL_AUTH_KEY= @
 LOGTAIL_KEY= # Use if you're configuring basic logging with logtail
 PLAYWRIGHT_MICROSERVICE_URL=  # set if you'd like to run a playwright fallback
 LLAMAPARSE_API_KEY= #Set if you have a llamaparse key you'd like to use to parse pdfs
 SERPER_API_KEY= #Set if you have a serper key you'd like to use as a search api
 SLACK_WEBHOOK_URL= # set if you'd like to send slack server health status messages
 POSTHOG_API_KEY= # set if you'd like to send posthog events like job logs
 POSTHOG_HOST= # set if you'd like to send posthog events like job logs
--- a/apps/api/.env.example
+++ b/apps/api/.env.example
@ -32,8 +32,6 @@ BULL_AUTH_KEY=@
 LOGTAIL_KEY=
 # set if you have a llamaparse key you'd like to use to parse pdfs
 LLAMAPARSE_API_KEY=
 # set if you have a serper key you'd like to use as a search api
 SERPER_API_KEY=
 # set if you'd like to send slack server health status messages
 SLACK_WEBHOOK_URL=
 # set if you'd like to send posthog events like job logs
--- a/apps/api/.gitignore
+++ b/apps/api/.gitignore
@ -6,3 +6,6 @@ dump.rdb
 /mongo-data
 /.next/
 .rdb
 .sentryclirc
--- a/apps/api/Dockerfile
+++ b/apps/api/Dockerfile
@ -12,8 +12,10 @@ RUN --mount=type=cache,id=pnpm,target=/pnpm/store pnpm install --prod --frozen-l
 FROM base AS build
 RUN --mount=type=cache,id=pnpm,target=/pnpm/store pnpm install --frozen-lockfile
 RUN apt-get update -qq && apt-get install -y ca-certificates && update-ca-certificates
 RUN pnpm install
-RUN pnpm run build
+RUN --mount=type=secret,id=SENTRY_AUTH_TOKEN \
    bash -c 'export SENTRY_AUTH_TOKEN="$(cat /run/secrets/SENTRY_AUTH_TOKEN)"; if [ -z $SENTRY_AUTH_TOKEN ]; then pnpm run build:nosentry; else pnpm run build; fi'
 # Install packages needed for deployment
--- a/apps/api/fly.staging.toml
+++ b/apps/api/fly.staging.toml
@ -24,8 +24,8 @@ kill_timeout = '30s'
 [http_service.concurrency]
  type = "requests"
-  hard_limit = 100
+  # hard_limit = 100
-  soft_limit = 50
+  soft_limit = 100
 [[http_service.checks]]
  grace_period = "10s"
@ -51,12 +51,13 @@ kill_timeout = '30s'
  [services.concurrency]
    type = 'connections'
-    hard_limit = 25
+    # hard_limit = 25
-    soft_limit = 20
+    soft_limit = 100
 [[vm]]
-  size = 'performance-1x'
+  size = 'performance-2x'
  processes = ['app','worker']
  memory = 8192
--- a/apps/api/fly.toml
+++ b/apps/api/fly.toml
@ -24,8 +24,8 @@ kill_timeout = '30s'
 [http_service.concurrency]
  type = "requests"
-  hard_limit = 200
+  # hard_limit = 200
-  soft_limit = 75
+  soft_limit = 200
 [[http_service.checks]]
  grace_period = "20s"
@ -50,8 +50,8 @@ kill_timeout = '30s'
  [services.concurrency]
    type = 'connections'
-    hard_limit = 30
+    # hard_limit = 30
-    soft_limit = 12
+    soft_limit = 200
 [[vm]]
  size = 'performance-4x'
--- a/apps/api/openapi-v0.json
+++ b/apps/api/openapi-v0.json
@ -0,0 +1,924 @@
 {
  "openapi": "3.0.0",
  "info": {
    "title": "Firecrawl API",
    "version": "0.0.0",
    "description": "API for interacting with Firecrawl services to perform web scraping and crawling tasks.",
    "contact": {
      "name": "Firecrawl Support",
      "url": "https://firecrawl.dev/support",
      "email": "support@firecrawl.dev"
    }
  },
  "servers": [
    {
      "url": "https://api.firecrawl.dev/v0"
    }
  ],
  "paths": {
    "/scrape": {
      "post": {
        "summary": "Scrape a single URL and optionally extract information using an LLM",
        "operationId": "scrapeAndExtractFromUrl",
        "tags": ["Scraping"],
        "security": [
          {
            "bearerAuth": []
          }
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "type": "object",
                "properties": {
                  "url": {
                    "type": "string",
                    "format": "uri",
                    "description": "The URL to scrape"
                  },
                  "pageOptions": {
                    "type": "object",
                    "properties": {
                      "headers": {
                        "type": "object",
                        "description": "Headers to send with the request. Can be used to send cookies, user-agent, etc."
                      },
                      "includeHtml": {
                        "type": "boolean",
                        "description": "Include the HTML version of the content on page. Will output a html key in the response.",
                        "default": false
                      },
                      "includeRawHtml": {
                        "type": "boolean",
                        "description": "Include the raw HTML content of the page. Will output a rawHtml key in the response.",
                        "default": false
                      },
                      "onlyIncludeTags": {
                        "type": "array",
                        "items": {
                          "type": "string"
                        },
                        "description": "Only include tags, classes and ids from the page in the final output. Use comma separated values. Example: 'script, .ad, #footer'"
                      },
                      "onlyMainContent": {
                        "type": "boolean",
                        "description": "Only return the main content of the page excluding headers, navs, footers, etc.",
                        "default": false
                      },
                      "removeTags": {
                        "type": "array",
                        "items": {
                          "type": "string"
                        },
                        "description": "Tags, classes and ids to remove from the page. Use comma separated values. Example: 'script, .ad, #footer'"
                      },
                      "replaceAllPathsWithAbsolutePaths": {
                        "type": "boolean",
                        "description": "Replace all relative paths with absolute paths for images and links",
                        "default": false
                      },
                      "screenshot": {
                        "type": "boolean",
                        "description": "Include a screenshot of the top of the page that you are scraping.",
                        "default": false
                      },
                      "fullPageScreenshot": {
                        "type": "boolean",
                        "description": "Include a full page screenshot of the page that you are scraping.",
                        "default": false
                      },
                      "waitFor": {
                        "type": "integer",
                        "description": "Wait x amount of milliseconds for the page to load to fetch content",
                        "default": 0
                      }
                    }
                  },
                  "extractorOptions": {
                    "type": "object",
                    "description": "Options for extraction of structured information from the page content. Note: LLM-based extraction is not performed by default and only occurs when explicitly configured. The 'markdown' mode simply returns the scraped markdown and is the default mode for scraping.",
                    "default": {},
                    "properties": {
                      "mode": {
                        "type": "string",
                        "enum": ["markdown", "llm-extraction", "llm-extraction-from-raw-html", "llm-extraction-from-markdown"],
                        "description": "The extraction mode to use. 'markdown': Returns the scraped markdown content, does not perform LLM extraction. 'llm-extraction': Extracts information from the cleaned and parsed content using LLM. 'llm-extraction-from-raw-html': Extracts information directly from the raw HTML using LLM. 'llm-extraction-from-markdown': Extracts information from the markdown content using LLM."
                      },
                      "extractionPrompt": {
                        "type": "string",
                        "description": "A prompt describing what information to extract from the page, applicable for LLM extraction modes."
                      },
                      "extractionSchema": {
                        "type": "object",
                        "additionalProperties": true,
                        "description": "The schema for the data to be extracted, required only for LLM extraction modes.",
                        "required": [
                          "company_mission",
                          "supports_sso",
                          "is_open_source"
                        ]
                      }
                    }
                  },
                  "timeout": {
                    "type": "integer",
                    "description": "Timeout in milliseconds for the request",
                    "default": 30000
                  }
                },
                "required": ["url"]
              }
            }
          }
        },
        "responses": {
          "200": {
            "description": "Successful response",
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/ScrapeResponse"
                }
              }
            }
          },
          "402": {
            "description": "Payment required",
            "content": {
              "application/json": {
                "schema": {
                  "type": "object",
                  "properties": {
                    "error": {
                      "type": "string",
                      "example": "Payment required to access this resource."
                    }
                  }
                }
              }
            }
          },
          "429": {
            "description": "Too many requests",
            "content": {
              "application/json": {
                "schema": {
                  "type": "object",
                  "properties": {
                    "error": {
                      "type": "string",
                      "example": "Request rate limit exceeded. Please wait and try again later."
                    }
                  }
                }
              }
            }
          },
          "500": {
            "description": "Server error",
            "content": {
              "application/json": {
                "schema": {
                  "type": "object",
                  "properties": {
                    "error": {
                      "type": "string",
                      "example": "An unexpected error occurred on the server."
                    }
                  }
                }
              }
            }
          }
        }
      }
    },
    "/crawl": {
      "post": {
        "summary": "Crawl multiple URLs based on options",
        "operationId": "crawlUrls",
        "tags": ["Crawling"],
        "security": [
          {
            "bearerAuth": []
          }
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "type": "object",
                "properties": {
                  "url": {
                    "type": "string",
                    "format": "uri",
                    "description": "The base URL to start crawling from"
                  },
                  "crawlerOptions": {
                    "type": "object",
                    "properties": {
                      "includes": {
                        "type": "array",
                        "items": {
                          "type": "string"
                        },
                        "description": "URL patterns to include"
                      },
                      "excludes": {
                        "type": "array",
                        "items": {
                          "type": "string"
                        },
                        "description": "URL patterns to exclude"
                      },
                      "generateImgAltText": {
                        "type": "boolean",
                        "description": "Generate alt text for images using LLMs (must have a paid plan)",
                        "default": false
                      },
                      "returnOnlyUrls": {
                        "type": "boolean",
                        "description": "If true, returns only the URLs as a list on the crawl status. Attention: the return response will be a list of URLs inside the data, not a list of documents.",
                        "default": false
                      },
                      "maxDepth": {
                        "type": "integer",
                        "description": "Maximum depth to crawl relative to the entered URL. A maxDepth of 0 scrapes only the entered URL. A maxDepth of 1 scrapes the entered URL and all pages one level deep. A maxDepth of 2 scrapes the entered URL and all pages up to two levels deep. Higher values follow the same pattern."
                      },
                      "mode": {
                        "type": "string",
                        "enum": ["default", "fast"],
                        "description": "The crawling mode to use. Fast mode crawls 4x faster websites without sitemap, but may not be as accurate and shouldn't be used in heavy js-rendered websites.",
                        "default": "default"
                      },
                      "ignoreSitemap": {
                        "type": "boolean",
                        "description": "Ignore the website sitemap when crawling",
                        "default": false
                      },
                      "limit": {
                        "type": "integer",
                        "description": "Maximum number of pages to crawl",
                        "default": 10000
                      },
                      "allowBackwardCrawling": {
                        "type": "boolean",
                        "description": "Enables the crawler to navigate from a specific URL to previously linked pages. For instance, from 'example.com/product/123' back to 'example.com/product'",
                        "default": false
                      },
                      "allowExternalContentLinks": {
                        "type": "boolean",
                        "description": "Allows the crawler to follow links to external websites.",
                        "default": false
                      }
                    }
                  },
                  "pageOptions": {
                    "type": "object",
                    "properties": {
                      "headers": {
                        "type": "object",
                        "description": "Headers to send with the request. Can be used to send cookies, user-agent, etc."
                      },
                      "includeHtml": {
                        "type": "boolean",
                        "description": "Include the HTML version of the content on page. Will output a html key in the response.",
                        "default": false
                      },
                      "includeRawHtml": {
                        "type": "boolean",
                        "description": "Include the raw HTML content of the page. Will output a rawHtml key in the response.",
                        "default": false
                      },
                      "onlyIncludeTags": {
                        "type": "array",
                        "items": {
                          "type": "string"
                        },
                        "description": "Only include tags, classes and ids from the page in the final output. Use comma separated values. Example: 'script, .ad, #footer'"
                      },
                      "onlyMainContent": {
                        "type": "boolean",
                        "description": "Only return the main content of the page excluding headers, navs, footers, etc.",
                        "default": false
                      },
                      "removeTags": {
                        "type": "array",
                        "items": {
                          "type": "string"
                        },
                        "description": "Tags, classes and ids to remove from the page. Use comma separated values. Example: 'script, .ad, #footer'"
                      },
                      "replaceAllPathsWithAbsolutePaths": {
                        "type": "boolean",
                        "description": "Replace all relative paths with absolute paths for images and links",
                        "default": false
                      },
                      "screenshot": {
                        "type": "boolean",
                        "description": "Include a screenshot of the top of the page that you are scraping.",
                        "default": false
                      },
                      "fullPageScreenshot": {
                        "type": "boolean",
                        "description": "Include a full page screenshot of the page that you are scraping.",
                        "default": false
                      },
                      "waitFor": {
                        "type": "integer",
                        "description": "Wait x amount of milliseconds for the page to load to fetch content",
                        "default": 0
                      }
                    }
                  }
                },
                "required": ["url"]
              }
            }
          }
        },
        "responses": {
          "200": {
            "description": "Successful response",
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/CrawlResponse"
                }
              }
            }
          },
          "402": {
            "description": "Payment required",
            "content": {
              "application/json": {
                "schema": {
                  "type": "object",
                  "properties": {
                    "error": {
                      "type": "string",
                      "example": "Payment required to access this resource."
                    }
                  }
                }
              }
            }
          },
          "429": {
            "description": "Too many requests",
            "content": {
              "application/json": {
                "schema": {
                  "type": "object",
                  "properties": {
                    "error": {
                      "type": "string",
                      "example": "Request rate limit exceeded. Please wait and try again later."
                    }
                  }
                }
              }
            }
          },
          "500": {
            "description": "Server error",
            "content": {
              "application/json": {
                "schema": {
                  "type": "object",
                  "properties": {
                    "error": {
                      "type": "string",
                      "example": "An unexpected error occurred on the server."
                    }
                  }
                }
              }
            }
          }
        }
      }
    },
    "/search": {
      "post": {
        "summary": "Search for a keyword in Google, returns top page results with markdown content for each page",
        "operationId": "searchGoogle",
        "tags": ["Search"],
        "security": [
          {
            "bearerAuth": []
          }
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "type": "object",
                "properties": {
                  "query": {
                    "type": "string",
                    "format": "uri",
                    "description": "The query to search for"
                  },
                  "pageOptions": {
                    "type": "object",
                    "properties": {
                      "onlyMainContent": {
                        "type": "boolean",
                        "description": "Only return the main content of the page excluding headers, navs, footers, etc.",
                        "default": false
                      },
                      "fetchPageContent": {
                        "type": "boolean",
                        "description": "Fetch the content of each page. If false, defaults to a basic fast serp API.",
                        "default": true
                      },
                      "includeHtml": {
                        "type": "boolean",
                        "description": "Include the HTML version of the content on page. Will output a html key in the response.",
                        "default": false
                      },
                      "includeRawHtml": {
                        "type": "boolean",
                        "description": "Include the raw HTML content of the page. Will output a rawHtml key in the response.",
                        "default": false
                      }
                    }
                  },
                  "searchOptions": {
                    "type": "object",
                    "properties": {
                      "limit": {
                        "type": "integer",
                        "description": "Maximum number of results. Max is 20 during beta."
                      }
                    }
                  }
                },
                "required": ["query"]
              }
            }
          }
        },
        "responses": {
          "200": {
            "description": "Successful response",
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/SearchResponse"
                }
              }
            }
          },
          "402": {
            "description": "Payment required",
            "content": {
              "application/json": {
                "schema": {
                  "type": "object",
                  "properties": {
                    "error": {
                      "type": "string",
                      "example": "Payment required to access this resource."
                    }
                  }
                }
              }
            }
          },
          "429": {
            "description": "Too many requests",
            "content": {
              "application/json": {
                "schema": {
                  "type": "object",
                  "properties": {
                    "error": {
                      "type": "string",
                      "example": "Request rate limit exceeded. Please wait and try again later."
                    }
                  }
                }
              }
            }
          },
          "500": {
            "description": "Server error",
            "content": {
              "application/json": {
                "schema": {
                  "type": "object",
                  "properties": {
                    "error": {
                      "type": "string",
                      "example": "An unexpected error occurred on the server."
                    }
                  }
                }
              }
            }
          }
        }
      }
    },
    "/crawl/status/{jobId}": {
      "get": {
        "tags": ["Crawl"],
        "summary": "Get the status of a crawl job",
        "operationId": "getCrawlStatus",
        "security": [
          {
            "bearerAuth": []
          }
        ],
        "parameters": [
          {
            "name": "jobId",
            "in": "path",
            "description": "ID of the crawl job",
            "required": true,
            "schema": {
              "type": "string"
            }
          }
        ],
        "responses": {
          "200": {
            "description": "Successful response",
            "content": {
              "application/json": {
                "schema": {
                  "type": "object",
                  "properties": {
                    "status": {
                      "type": "string",
                      "description": "Status of the job (completed, active, failed, paused)"
                    },
                    "current": {
                      "type": "integer",
                      "description": "Current page number"
                    },
                    "total": {
                      "type": "integer",
                      "description": "Total number of pages"
                    },
                    "data": {
                      "type": "array",
                      "items": {
                        "$ref": "#/components/schemas/CrawlStatusResponseObj"
                      },
                      "description": "Data returned from the job (null when it is in progress)"
                    },
                    "partial_data": {
                      "type": "array",
                      "items": {
                        "$ref": "#/components/schemas/CrawlStatusResponseObj"
                      },
                      "description": "Partial documents returned as it is being crawled (streaming). **This feature is currently in alpha - expect breaking changes** When a page is ready, it will append to the partial_data array, so there is no need to wait for the entire website to be crawled. When the crawl is done, partial_data will become empty and the result will be available in `data`. There is a max of 50 items in the array response. The oldest item (top of the array) will be removed when the new item is added to the array."
                    }
                  }
                }
              }
            }
          },
          "402": {
            "description": "Payment required",
            "content": {
              "application/json": {
                "schema": {
                  "type": "object",
                  "properties": {
                    "error": {
                      "type": "string",
                      "example": "Payment required to access this resource."
                    }
                  }
                }
              }
            }
          },
          "429": {
            "description": "Too many requests",
            "content": {
              "application/json": {
                "schema": {
                  "type": "object",
                  "properties": {
                    "error": {
                      "type": "string",
                      "example": "Request rate limit exceeded. Please wait and try again later."
                    }
                  }
                }
              }
            }
          },
          "500": {
            "description": "Server error",
            "content": {
              "application/json": {
                "schema": {
                  "type": "object",
                  "properties": {
                    "error": {
                      "type": "string",
                      "example": "An unexpected error occurred on the server."
                    }
                  }
                }
              }
            }
          }
        }
      }
    },
    "/crawl/cancel/{jobId}": {
      "delete": {
        "tags": ["Crawl"],
        "summary": "Cancel a crawl job",
        "operationId": "cancelCrawlJob",
        "security": [
          {
            "bearerAuth": []
          }
        ],
        "parameters": [
          {
            "name": "jobId",
            "in": "path",
            "description": "ID of the crawl job",
            "required": true,
            "schema": {
              "type": "string"
            }
          }
        ],
        "responses": {
          "200": {
            "description": "Successful response",
            "content": {
              "application/json": {
                "schema": {
                  "type": "object",
                  "properties": {
                    "status": {
                      "type": "string",
                      "description": "Returns cancelled."
                    }
                  }
                }
              }
            }
          },
          "402": {
            "description": "Payment required",
            "content": {
              "application/json": {
                "schema": {
                  "type": "object",
                  "properties": {
                    "error": {
                      "type": "string",
                      "example": "Payment required to access this resource."
                    }
                  }
                }
              }
            }
          },
          "429": {
            "description": "Too many requests",
            "content": {
              "application/json": {
                "schema": {
                  "type": "object",
                  "properties": {
                    "error": {
                      "type": "string",
                      "example": "Request rate limit exceeded. Please wait and try again later."
                    }
                  }
                }
              }
            }
          },
          "500": {
            "description": "Server error",
            "content": {
              "application/json": {
                "schema": {
                  "type": "object",
                  "properties": {
                    "error": {
                      "type": "string",
                      "example": "An unexpected error occurred on the server."
                    }
                  }
                }
              }
            }
          }
        }
      }
    }
  },
  "components": {
    "securitySchemes": {
      "bearerAuth": {
        "type": "http",
        "scheme": "bearer"
      }
    },
    "schemas": {
      "ScrapeResponse": {
        "type": "object",
        "properties": {
          "success": {
            "type": "boolean"
          },
          "data": {
            "type": "object",
            "properties": {
              "markdown": {
                "type": "string"
              },
              "content": {
                "type": "string"
              },
              "html": {
                "type": "string",
                "nullable": true,
                "description": "HTML version of the content on page if `includeHtml`  is true"
              },
              "rawHtml": {
                "type": "string",
                "nullable": true,
                "description": "Raw HTML content of the page if `includeRawHtml`  is true"
              },
              "metadata": {
                "type": "object",
                "properties": {
                  "title": {
                    "type": "string"
                  },
                  "description": {
                    "type": "string"
                  },
                  "language": {
                    "type": "string",
                    "nullable": true
                  },
                  "sourceURL": {
                    "type": "string",
                    "format": "uri"
                  },
                  "<any other metadata> ": {
                    "type": "string"
                  },
                  "pageStatusCode": {
                    "type": "integer",
                    "description": "The status code of the page"
                  },
                  "pageError": {
                    "type": "string",
                    "nullable": true,
                    "description": "The error message of the page"
                  }
                }
              },
              "llm_extraction": {
                "type": "object",
                "description": "Displayed when using LLM Extraction. Extracted data from the page following the schema defined.",
                "nullable": true
              },
              "warning": {
                "type": "string",
                "nullable": true,
                "description": "Can be displayed when using LLM Extraction. Warning message will let you know any issues with the extraction."
              }
            }
          }
        }
      },
      "CrawlStatusResponseObj": {
        "type": "object",
        "properties": {
          "markdown": {
            "type": "string"
          },
          "content": {
            "type": "string"
          },
          "html": {
            "type": "string",
            "nullable": true,
            "description": "HTML version of the content on page if `includeHtml`  is true"
          },
          "rawHtml": {
            "type": "string",
            "nullable": true,
            "description": "Raw HTML content of the page if `includeRawHtml`  is true"
          },
          "index": {
            "type": "integer",
            "description": "The number of the page that was crawled. This is useful for `partial_data` so you know which page the data is from." 
          },
          "metadata": {
            "type": "object",
            "properties": {
              "title": {
                "type": "string"
              },
              "description": {
                "type": "string"
              },
              "language": {
                "type": "string",
                "nullable": true
              },
              "sourceURL": {
                "type": "string",
                "format": "uri"
              },
              "<any other metadata> ": {
                "type": "string"
              },
              "pageStatusCode": {
                "type": "integer",
                "description": "The status code of the page"
              },
              "pageError": {
                "type": "string",
                "nullable": true,
                "description": "The error message of the page"
              }
            }
          }
        }
      },
      "SearchResponse": {
        "type": "object",
        "properties": {
          "success": {
            "type": "boolean"
          },
          "data": {
            "type": "array",
            "items": {
              "type": "object",
              "properties": {
                "url": {
                  "type": "string"
                },
                "markdown": {
                  "type": "string"
                },
                "content": {
                  "type": "string"
                },
                "metadata": {
                  "type": "object",
                  "properties": {
                    "title": {
                      "type": "string"
                    },
                    "description": {
                      "type": "string"
                    },
                    "language": {
                      "type": "string",
                      "nullable": true
                    },
                    "sourceURL": {
                      "type": "string",
                      "format": "uri"
                    }
                  }
                }
              }
            }
          }
        }
      },
      "CrawlResponse": {
        "type": "object",
        "properties": {
          "jobId": {
            "type": "string"
          }
        }
      }
    }
  },
  "security": [
    {
      "bearerAuth": []
    }
  ]
 }
--- a/apps/api/openapi.json
+++ b/apps/api/openapi.json
@ -18,8 +18,8 @@
  "paths": {
    "/scrape": {
      "post": {
-        "summary": "Scrape a single URL and optionally extract information using an LLM",
+        "summary": "Scrape a single URL",
-        "operationId": "scrapeAndExtractFromUrl",
+        "operationId": "scrape",
        "tags": ["Scraping"],
        "security": [
          {
@ -38,94 +38,47 @@
                    "format": "uri",
                    "description": "The URL to scrape"
                  },
-                  "pageOptions": {
+                  "formats": {
-                    "type": "object",
+                    "type": "array",
-                    "properties": {
+                    "items": {
-                      "headers": {
+                      "type": "string",
-                        "type": "object",
+                      "enum": ["markdown", "html", "rawHtml", "links", "screenshot", "screenshot@fullPage"]
-                        "description": "Headers to send with the request. Can be used to send cookies, user-agent, etc."
+                    },
-                      },
+                    "description": "Specific formats to return.\n\n - markdown: The page in Markdown format.\n - html: The page's HTML, trimmed to include only meaningful content.\n - rawHtml: The page's original HTML.\n - links: The links on the page.\n - screenshot: A screenshot of the top of the page.\n - screenshot@fullPage: A screenshot of the full page. (overridden by screenshot if present)",
-                      "includeHtml": {
+                    "default": ["markdown"]
                        "type": "boolean",
                        "description": "Include the HTML version of the content on page. Will output a html key in the response.",
                        "default": false
                      },
                      "includeRawHtml": {
                        "type": "boolean",
                        "description": "Include the raw HTML content of the page. Will output a rawHtml key in the response.",
                        "default": false
                      },
                      "onlyIncludeTags": {
                        "type": "array",
                        "items": {
                          "type": "string"
                        },
                        "description": "Only include tags, classes and ids from the page in the final output. Use comma separated values. Example: 'script, .ad, #footer'"
                      },
                      "onlyMainContent": {
                        "type": "boolean",
                        "description": "Only return the main content of the page excluding headers, navs, footers, etc.",
                        "default": false
                      },
                      "removeTags": {
                        "type": "array",
                        "items": {
                          "type": "string"
                        },
                        "description": "Tags, classes and ids to remove from the page. Use comma separated values. Example: 'script, .ad, #footer'"
                      },
                      "replaceAllPathsWithAbsolutePaths": {
                        "type": "boolean",
                        "description": "Replace all relative paths with absolute paths for images and links",
                        "default": false
                      },
                      "screenshot": {
                        "type": "boolean",
                        "description": "Include a screenshot of the top of the page that you are scraping.",
                        "default": false
                      },
                      "fullPageScreenshot": {
                        "type": "boolean",
                        "description": "Include a full page screenshot of the page that you are scraping.",
                        "default": false
                      },
                      "waitFor": {
                        "type": "integer",
                        "description": "Wait x amount of milliseconds for the page to load to fetch content",
                        "default": 0
                      }
                    }
                  },
-                  "extractorOptions": {
+                  "headers": {
                    "type": "object",
-                    "description": "Options for extraction of structured information from the page content. Note: LLM-based extraction is not performed by default and only occurs when explicitly configured. The 'markdown' mode simply returns the scraped markdown and is the default mode for scraping.",
+                    "description": "Headers to send with the request. Can be used to send cookies, user-agent, etc."
-                    "default": {},
+                  },
-                    "properties": {
+                  "includeTags": {
-                      "mode": {
+                    "type": "array",
-                        "type": "string",
+                    "items": {
-                        "enum": ["markdown", "llm-extraction", "llm-extraction-from-raw-html", "llm-extraction-from-markdown"],
+                      "type": "string"
-                        "description": "The extraction mode to use. 'markdown': Returns the scraped markdown content, does not perform LLM extraction. 'llm-extraction': Extracts information from the cleaned and parsed content using LLM. 'llm-extraction-from-raw-html': Extracts information directly from the raw HTML using LLM. 'llm-extraction-from-markdown': Extracts information from the markdown content using LLM."
+                    },
-                      },
+                    "description": "Only include tags, classes and ids from the page in the final output. Use comma separated values. Example: 'script, .ad, #footer'"
-                      "extractionPrompt": {
+                  },
-                        "type": "string",
+                  "excludeTags": {
-                        "description": "A prompt describing what information to extract from the page, applicable for LLM extraction modes."
+                    "type": "array",
-                      },
+                    "items": {
-                      "extractionSchema": {
+                      "type": "string"
-                        "type": "object",
+                    },
-                        "additionalProperties": true,
+                    "description": "Tags, classes and ids to remove from the page. Use comma separated values. Example: 'script, .ad, #footer'"
-                        "description": "The schema for the data to be extracted, required only for LLM extraction modes.",
+                  },
-                        "required": [
+                  "onlyMainContent": {
-                          "company_mission",
+                    "type": "boolean",
-                          "supports_sso",
+                    "description": "Only return the main content of the page excluding headers, navs, footers, etc.",
-                          "is_open_source"
+                    "default": true
                        ]
                      }
                    }
                  },
                  "timeout": {
                    "type": "integer",
                    "description": "Timeout in milliseconds for the request",
                    "default": 30000
                  },
                  "waitFor": {
                    "type": "integer",
                    "description": "Wait x amount of milliseconds for the page to load to fetch content",
                    "default": 0
                  }
                },
                "required": ["url"]
@ -741,24 +694,42 @@
          "success": {
            "type": "boolean"
          },
          "warning": {
            "type": "string",
            "nullable": true,
            "description": "Warning message to let you know of any issues."
          },
          "data": {
            "type": "object",
            "properties": {
              "markdown": {
-                "type": "string"
+                "type": "string",
-              },
+                "nullable": true,
-              "content": {
+                "description": "Markdown content of the page if the `markdown` format was specified (default)"
                "type": "string"
              },
              "html": {
                "type": "string",
                "nullable": true,
-                "description": "HTML version of the content on page if `includeHtml`  is true"
+                "description": "HTML version of the content on page if the `html` format was specified"
              },
              "rawHtml": {
                "type": "string",
                "nullable": true,
-                "description": "Raw HTML content of the page if `includeRawHtml`  is true"
+                "description": "Raw HTML content of the page if the `rawHtml` format was specified"
              },
              "links": {
                "type": "array",
                "items": {
                  "type": "string",
                  "format": "uri"
                },
                "nullable": true,
                "description": "Links on the page if the `links` format was specified"
              },
              "screenshot": {
                "type": "string",
                "nullable": true,
                "description": "URL of the screenshot of the page if the `screenshot` or `screenshot@fullSize` format was specified"
              },
              "metadata": {
                "type": "object",
@ -780,27 +751,16 @@
                  "<any other metadata> ": {
                    "type": "string"
                  },
-                  "pageStatusCode": {
+                  "statusCode": {
                    "type": "integer",
                    "description": "The status code of the page"
                  },
-                  "pageError": {
+                  "error": {
                    "type": "string",
                    "nullable": true,
                    "description": "The error message of the page"
                  }
                }
              },
              "llm_extraction": {
                "type": "object",
                "description": "Displayed when using LLM Extraction. Extracted data from the page following the schema defined.",
                "nullable": true
              },
              "warning": {
                "type": "string",
                "nullable": true,
                "description": "Can be displayed when using LLM Extraction. Warning message will let you know any issues with the extraction."
              }
            }
          }
@ -810,24 +770,33 @@
        "type": "object",
        "properties": {
          "markdown": {
-            "type": "string"
+            "type": "string",
-          },
+            "nullable": true,
-          "content": {
+            "description": "Markdown content of the page if the `markdown` format was specified (default)"
            "type": "string"
          },
          "html": {
            "type": "string",
            "nullable": true,
-            "description": "HTML version of the content on page if `includeHtml`  is true"
+            "description": "HTML version of the content on page if the `html` format was specified"
          },
          "rawHtml": {
            "type": "string",
            "nullable": true,
-            "description": "Raw HTML content of the page if `includeRawHtml`  is true"
+            "description": "Raw HTML content of the page if the `rawHtml` format was specified"
          },
-          "index": {
+          "links": {
-            "type": "integer",
+            "type": "array",
-            "description": "The number of the page that was crawled. This is useful for `partial_data` so you know which page the data is from." 
+            "items": {
              "type": "string",
              "format": "uri"
            },
            "nullable": true,
            "description": "Links on the page if the `links` format was specified"
          },
          "screenshot": {
            "type": "string",
            "nullable": true,
            "description": "URL of the screenshot of the page if the `screenshot` or `screenshot@fullSize` format was specified"
          },
          "metadata": {
            "type": "object",
@ -849,11 +818,11 @@
              "<any other metadata> ": {
                "type": "string"
              },
-              "pageStatusCode": {
+              "statusCode": {
                "type": "integer",
                "description": "The status code of the page"
              },
-              "pageError": {
+              "error": {
                "type": "string",
                "nullable": true,
                "description": "The error message of the page"
@ -871,34 +840,63 @@
          "data": {
            "type": "array",
            "items": {
-              "type": "object",
+              "markdown": {
-              "properties": {
+                "type": "string",
-                "url": {
+                "nullable": true,
-                  "type": "string"
+                "description": "Markdown content of the page if the `markdown` format was specified (default)"
              },
              "html": {
                "type": "string",
                "nullable": true,
                "description": "HTML version of the content on page if the `html` format was specified"
              },
              "rawHtml": {
                "type": "string",
                "nullable": true,
                "description": "Raw HTML content of the page if the `rawHtml` format was specified"
              },
              "links": {
                "type": "array",
                "items": {
                  "type": "string",
                  "format": "uri"
                },
-                "markdown": {
+                "nullable": true,
-                  "type": "string"
+                "description": "Links on the page if the `links` format was specified"
-                },
+              },
-                "content": {
+              "screenshot": {
-                  "type": "string"
+                "type": "string",
-                },
+                "nullable": true,
-                "metadata": {
+                "description": "URL of the screenshot of the page if the `screenshot` or `screenshot@fullSize` format was specified"
-                  "type": "object",
+              },
-                  "properties": {
+              "metadata": {
-                    "title": {
+                "type": "object",
-                      "type": "string"
+                "properties": {
-                    },
+                  "title": {
-                    "description": {
+                    "type": "string"
-                      "type": "string"
+                  },
-                    },
+                  "description": {
-                    "language": {
+                    "type": "string"
-                      "type": "string",
+                  },
-                      "nullable": true
+                  "language": {
-                    },
+                    "type": "string",
-                    "sourceURL": {
+                    "nullable": true
-                      "type": "string",
+                  },
-                      "format": "uri"
+                  "sourceURL": {
-                    }
+                    "type": "string",
                    "format": "uri"
                  },
                  "<any other metadata> ": {
                    "type": "string"
                  },
                  "statusCode": {
                    "type": "integer",
                    "description": "The status code of the page"
                  },
                  "error": {
                    "type": "string",
                    "nullable": true,
                    "description": "The error message of the page"
                  }
                }
              }
@ -909,8 +907,15 @@
      "CrawlResponse": {
        "type": "object",
        "properties": {
-          "jobId": {
+          "success": {
            "type": "boolean"
          },
          "id": {
            "type": "string"
          },
          "url": {
            "type": "string",
            "format": "uri"
          }
        }
      }
--- a/apps/api/package.json
+++ b/apps/api/package.json
@ -9,7 +9,8 @@
    "format": "prettier --write \"src/**/*.(js|ts)\"",
    "flyio": "node dist/src/index.js",
    "start:dev": "nodemon --exec ts-node src/index.ts",
-    "build": "tsc",
+    "build": "tsc && pnpm sentry:sourcemaps",
    "build:nosentry": "tsc",
    "test": "npx jest --detectOpenHandles --forceExit --openHandlesTimeout=120000 --watchAll=false --testPathIgnorePatterns='src/__tests__/e2e_noAuth/*'",
    "test:local-no-auth": "npx jest --detectOpenHandles --forceExit --openHandlesTimeout=120000 --watchAll=false --testPathIgnorePatterns='src/__tests__/e2e_withAuth/*'",
    "test:full": "npx jest --detectOpenHandles --forceExit --openHandlesTimeout=120000 --watchAll=false --testPathIgnorePatterns='(src/__tests__/e2e_noAuth|src/__tests__/e2e_withAuth)'",
@ -19,8 +20,9 @@
    "mongo-docker": "docker run -d -p 2717:27017 -v ./mongo-data:/data/db --name mongodb mongo:latest",
    "mongo-docker-console": "docker exec -it mongodb mongosh",
    "run-example": "npx ts-node src/example.ts",
-    "deploy:fly": "flyctl deploy",
+    "deploy:fly": "flyctl deploy --build-secret SENTRY_AUTH_TOKEN=$(dotenv -p SENTRY_AUTH_TOKEN)",
-    "deploy:fly:staging": "fly deploy -c fly.staging.toml"
+    "deploy:fly:staging": "fly deploy -c fly.staging.toml",
    "sentry:sourcemaps": "sentry-cli sourcemaps inject --org caleb-peffer --project firecrawl-scraper-js ./dist && sentry-cli sourcemaps upload --org caleb-peffer --project firecrawl-scraper-js ./dist"
  },
  "author": "",
  "license": "ISC",
@ -29,7 +31,6 @@
    "@jest/globals": "^29.7.0",
    "@tsconfig/recommended": "^1.0.3",
    "@types/body-parser": "^1.19.2",
    "@types/bull": "^4.10.0",
    "@types/cors": "^2.8.13",
    "@types/express": "^4.17.17",
    "@types/jest": "^29.5.12",
@ -53,17 +54,21 @@
    "@bull-board/express": "^5.20.5",
    "@devil7softwares/pos": "^1.0.2",
    "@dqbd/tiktoken": "^1.0.13",
-    "@hyperdx/node-opentelemetry": "^0.8.0",
+    "@hyperdx/node-opentelemetry": "^0.8.1",
    "@logtail/node": "^0.4.12",
    "@nangohq/node": "^0.40.8",
-    "@sentry/node": "^8.13.0",
+    "@sentry/cli": "^2.33.1",
    "@sentry/node": "^8.26.0",
    "@sentry/profiling-node": "^8.26.0",
    "@supabase/supabase-js": "^2.44.2",
    "@types/express-ws": "^3.0.4",
    "@types/ws": "^8.5.12",
    "ajv": "^8.16.0",
    "async": "^3.2.5",
    "async-mutex": "^0.5.0",
    "axios": "^1.3.4",
    "bottleneck": "^2.19.5",
-    "bull": "^4.15.0",
+    "bullmq": "^5.11.0",
    "cacheable-lookup": "^6.1.0",
    "cheerio": "^1.0.0-rc.12",
    "cohere": "^1.1.1",
@ -71,7 +76,9 @@
    "cron-parser": "^4.9.0",
    "date-fns": "^3.6.0",
    "dotenv": "^16.3.1",
    "dotenv-cli": "^7.4.2",
    "express-rate-limit": "^7.3.1",
    "express-ws": "^5.0.2",
    "form-data": "^4.0.0",
    "glob": "^10.4.2",
    "gpt3-tokenizer": "^1.1.5",
@ -99,14 +106,16 @@
    "robots-parser": "^3.0.1",
    "scrapingbee": "^1.7.4",
    "stripe": "^16.1.0",
    "systeminformation": "^5.22.11",
    "turndown": "^7.1.3",
    "turndown-plugin-gfm": "^1.0.2",
    "typesense": "^1.5.4",
    "unstructured-client": "^0.11.3",
    "uuid": "^10.0.0",
    "wordpos": "^2.1.0",
    "ws": "^8.18.0",
    "xml2js": "^0.6.2",
-    "zod": "^3.23.4",
+    "zod": "^3.23.8",
    "zod-to-json-schema": "^3.23.1"
  },
  "nodemonConfig": {
@ -116,4 +125,4 @@
      "temp"
    ]
  }
-}
+}
--- a/apps/api/pnpm-lock.yaml
+++ b/apps/api/pnpm-lock.yaml
--- a/apps/api/requests.http
+++ b/apps/api/requests.http
@ -1,12 +1,16 @@
 ### Crawl Website
 POST http://localhost:3002/v0/scrape HTTP/1.1
-Authorization: Bearer fc
+Authorization: Bearer fc-4e6259caf03b42a4b6c9261e0f96e673
 content-type: application/json
 {
-    "url":"firecrawl.dev"
+    "url":"corterix.com"
 }
 ### Check Job Status
 GET http://localhost:3002/v1/crawl/1dd0f924-a36f-4b96-94ea-32ed954dac67 HTTP/1.1
 Authorization: Bearer fc-4e6259caf03b42a4b6c9261e0f96e673
 ### Check Job Status
 GET http://localhost:3002/v0/jobs/active HTTP/1.1
--- a/apps/api/src/tests/e2e_full_withAuth/index.test.ts
+++ b/apps/api/src/tests/e2e_full_withAuth/index.test.ts
@ -404,7 +404,7 @@ describe("E2E Tests for API Routes", () => {
        .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
        .set("Content-Type", "application/json")
        .set("x-idempotency-key", uniqueIdempotencyKey)
-        .send({ url: 'https://mendable.ai' });
+        .send({ url: 'https://docs.firecrawl.dev' });
        expect(firstResponse.statusCode).toBe(200);
@ -414,7 +414,7 @@ describe("E2E Tests for API Routes", () => {
        .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
        .set("Content-Type", "application/json")
        .set("x-idempotency-key", uniqueIdempotencyKey)
-        .send({ url: 'https://mendable.ai' });
+        .send({ url: 'https://docs.firecrawl.dev' });
      expect(secondResponse.statusCode).toBe(409);
      expect(secondResponse.body.error).toBe('Idempotency key already used');
--- a/apps/api/src/tests/e2e_v1_withAuth/index.test.ts
+++ b/apps/api/src/tests/e2e_v1_withAuth/index.test.ts
@ -0,0 +1,951 @@
 import request from "supertest";
 import dotenv from "dotenv";
 import {
  ScrapeRequest,
  ScrapeResponseRequestTest,
 } from "../../controllers/v1/types";
 dotenv.config();
 const TEST_URL = "http://127.0.0.1:3002";
 describe("E2E Tests for v1 API Routes", () => {
  beforeAll(() => {
    process.env.USE_DB_AUTHENTICATION = "true";
  });
  afterAll(() => {
    delete process.env.USE_DB_AUTHENTICATION;
  });
  describe("GET /is-production", () => {
    it.concurrent("should return the production status", async () => {
      const response: ScrapeResponseRequestTest = await request(TEST_URL).get(
        "/is-production"
      );
      expect(response.statusCode).toBe(200);
      expect(response.body).toHaveProperty("isProduction");
    });
  });
  describe("POST /v1/scrape", () => {
    it.concurrent("should require authorization", async () => {
      const response: ScrapeResponseRequestTest = await request(TEST_URL).post(
        "/v1/scrape"
      );
      expect(response.statusCode).toBe(401);
    });
    it.concurrent("should throw error for blocklisted URL", async () => {
      const scrapeRequest: ScrapeRequest = {
        url: "https://facebook.com/fake-test",
      };
      const response = await request(TEST_URL)
        .post("/v1/scrape")
        .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
        .set("Content-Type", "application/json")
        .send(scrapeRequest);
      expect(response.statusCode).toBe(403);
      expect(response.body.error).toBe("URL is blocked. Firecrawl currently does not support social media scraping due to policy restrictions.");
    });
    it.concurrent(
      "should return an error response with an invalid API key",
      async () => {
        const response: ScrapeResponseRequestTest = await request(TEST_URL)
          .post("/v1/scrape")
          .set("Authorization", `Bearer invalid-api-key`)
          .set("Content-Type", "application/json")
          .send({ url: "https://firecrawl.dev" });
        expect(response.statusCode).toBe(401);
      }
    );
    it.concurrent(
      "should return a successful response with a valid API key",
      async () => {
        const scrapeRequest: ScrapeRequest = {
          url: "https://roastmywebsite.ai",
        };
        const response: ScrapeResponseRequestTest = await request(TEST_URL)
          .post("/v1/scrape")
          .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
          .set("Content-Type", "application/json")
          .send(scrapeRequest);
        expect(response.statusCode).toBe(200);
        if (!("data" in response.body)) {
          throw new Error("Expected response body to have 'data' property");
        }
        expect(response.body.data).not.toHaveProperty("content");
        expect(response.body.data).toHaveProperty("markdown");
        expect(response.body.data).toHaveProperty("metadata");
        expect(response.body.data).not.toHaveProperty("html");
        expect(response.body.data.markdown).toContain("_Roast_");
        expect(response.body.data.metadata.error).toBeUndefined();
        expect(response.body.data.metadata.title).toBe("Roast My Website");
        expect(response.body.data.metadata.description).toBe(
          "Welcome to Roast My Website, the ultimate tool for putting your website through the wringer! This repository harnesses the power of Firecrawl to scrape and capture screenshots of websites, and then unleashes the latest LLM vision models to mercilessly roast them. 🌶️"
        );
        expect(response.body.data.metadata.keywords).toBe(
          "Roast My Website,Roast,Website,GitHub,Firecrawl"
        );
        expect(response.body.data.metadata.robots).toBe("follow, index");
        expect(response.body.data.metadata.ogTitle).toBe("Roast My Website");
        expect(response.body.data.metadata.ogDescription).toBe(
          "Welcome to Roast My Website, the ultimate tool for putting your website through the wringer! This repository harnesses the power of Firecrawl to scrape and capture screenshots of websites, and then unleashes the latest LLM vision models to mercilessly roast them. 🌶️"
        );
        expect(response.body.data.metadata.ogUrl).toBe(
          "https://www.roastmywebsite.ai"
        );
        expect(response.body.data.metadata.ogImage).toBe(
          "https://www.roastmywebsite.ai/og.png"
        );
        expect(response.body.data.metadata.ogLocaleAlternate).toStrictEqual([]);
        expect(response.body.data.metadata.ogSiteName).toBe("Roast My Website");
        expect(response.body.data.metadata.sourceURL).toBe(
          "https://roastmywebsite.ai"
        );
        expect(response.body.data.metadata.statusCode).toBe(200);
      },
      30000
    ); // 30 seconds timeout
    it.concurrent(
      "should return a successful response with a valid API key and includeHtml set to true",
      async () => {
        const scrapeRequest: ScrapeRequest = {
          url: "https://roastmywebsite.ai",
          formats: ["markdown", "html"],
        };
        const response: ScrapeResponseRequestTest = await request(TEST_URL)
          .post("/v1/scrape")
          .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
          .set("Content-Type", "application/json")
          .send(scrapeRequest);
        expect(response.statusCode).toBe(200);
        expect(response.body).toHaveProperty("data");
        if (!("data" in response.body)) {
          throw new Error("Expected response body to have 'data' property");
        }
        expect(response.body.data).toHaveProperty("markdown");
        expect(response.body.data).toHaveProperty("html");
        expect(response.body.data).toHaveProperty("metadata");
        expect(response.body.data.markdown).toContain("_Roast_");
        expect(response.body.data.html).toContain("<h1");
        expect(response.body.data.metadata.statusCode).toBe(200);
        expect(response.body.data.metadata.error).toBeUndefined();
      },
      30000
    );
    it.concurrent('should return a successful response for a valid scrape with PDF file', async () => {
        const scrapeRequest: ScrapeRequest = {
          url: "https://arxiv.org/pdf/astro-ph/9301001.pdf"
        //   formats: ["markdown", "html"],
        };
        const response: ScrapeResponseRequestTest = await request(TEST_URL)
          .post('/v1/scrape')
          .set('Authorization', `Bearer ${process.env.TEST_API_KEY}`)
          .set('Content-Type', 'application/json')
          .send(scrapeRequest);
        await new Promise((r) => setTimeout(r, 6000));
        expect(response.statusCode).toBe(200);
        expect(response.body).toHaveProperty('data');
        if (!("data" in response.body)) {
          throw new Error("Expected response body to have 'data' property");
        }
        expect(response.body.data).toHaveProperty('metadata');
        expect(response.body.data.markdown).toContain('Broad Line Radio Galaxy');
        expect(response.body.data.metadata.statusCode).toBe(200);
        expect(response.body.data.metadata.error).toBeUndefined();
      }, 60000);
      it.concurrent('should return a successful response for a valid scrape with PDF file without explicit .pdf extension', async () => {
        const scrapeRequest: ScrapeRequest = {
          url: "https://arxiv.org/pdf/astro-ph/9301001"
        };
        const response: ScrapeResponseRequestTest = await request(TEST_URL)
          .post('/v1/scrape')
          .set('Authorization', `Bearer ${process.env.TEST_API_KEY}`)
          .set('Content-Type', 'application/json')
          .send(scrapeRequest);
        await new Promise((r) => setTimeout(r, 6000));
        expect(response.statusCode).toBe(200);
        expect(response.body).toHaveProperty('data');
        if (!("data" in response.body)) {
          throw new Error("Expected response body to have 'data' property");
        }
        expect(response.body.data).toHaveProperty('markdown');
        expect(response.body.data).toHaveProperty('metadata');
        expect(response.body.data.markdown).toContain('Broad Line Radio Galaxy');
        expect(response.body.data.metadata.statusCode).toBe(200);
        expect(response.body.data.metadata.error).toBeUndefined();
      }, 60000);
      it.concurrent("should return a successful response with a valid API key with removeTags option", async () => {
        const scrapeRequest: ScrapeRequest = {
          url: "https://www.scrapethissite.com/",
          onlyMainContent: false // default is true
        };
        const responseWithoutRemoveTags: ScrapeResponseRequestTest = await request(TEST_URL)
          .post("/v1/scrape")
          .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
          .set("Content-Type", "application/json")
          .send(scrapeRequest);
        expect(responseWithoutRemoveTags.statusCode).toBe(200);
        expect(responseWithoutRemoveTags.body).toHaveProperty("data");
        if (!("data" in responseWithoutRemoveTags.body)) {
          throw new Error("Expected response body to have 'data' property");
        }
        expect(responseWithoutRemoveTags.body.data).toHaveProperty("markdown");
        expect(responseWithoutRemoveTags.body.data).toHaveProperty("metadata");
        expect(responseWithoutRemoveTags.body.data).not.toHaveProperty("html");
        expect(responseWithoutRemoveTags.body.data.markdown).toContain("[FAQ](/faq/)"); // .nav
        expect(responseWithoutRemoveTags.body.data.markdown).toContain("Hartley Brody 2023"); // #footer
        const scrapeRequestWithRemoveTags: ScrapeRequest = {
            url: "https://www.scrapethissite.com/",
            excludeTags: ['.nav', '#footer', 'strong'],
            onlyMainContent: false // default is true
        };
        const response: ScrapeResponseRequestTest = await request(TEST_URL)
          .post("/v1/scrape")
          .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
          .set("Content-Type", "application/json")
          .send(scrapeRequestWithRemoveTags);
        expect(response.statusCode).toBe(200);
        expect(response.body).toHaveProperty("data");
        if (!("data" in response.body)) {
          throw new Error("Expected response body to have 'data' property");
        }
        expect(response.body.data).toHaveProperty("markdown");
        expect(response.body.data).toHaveProperty("metadata");
        expect(response.body.data).not.toHaveProperty("html");
        expect(response.body.data.markdown).not.toContain("Hartley Brody 2023");
        expect(response.body.data.markdown).not.toContain("[FAQ](/faq/)"); // 
      }, 30000);
      it.concurrent('should return a successful response for a scrape with 400 page', async () => {
        const response: ScrapeResponseRequestTest = await request(TEST_URL)
          .post('/v1/scrape')
          .set('Authorization', `Bearer ${process.env.TEST_API_KEY}`)
          .set('Content-Type', 'application/json')
          .send({ url: 'https://httpstat.us/400' });
        await new Promise((r) => setTimeout(r, 5000));
        expect(response.statusCode).toBe(200);
        expect(response.body).toHaveProperty('data');
        if (!("data" in response.body)) {
          throw new Error("Expected response body to have 'data' property");
        }
        expect(response.body.data).toHaveProperty('markdown');
        expect(response.body.data).toHaveProperty('metadata');
        expect(response.body.data.metadata.statusCode).toBe(400);
      }, 60000);
      it.concurrent('should return a successful response for a scrape with 401 page', async () => {
        const response: ScrapeResponseRequestTest = await request(TEST_URL)
          .post('/v1/scrape')
          .set('Authorization', `Bearer ${process.env.TEST_API_KEY}`)
          .set('Content-Type', 'application/json')
          .send({ url: 'https://httpstat.us/401' });
        await new Promise((r) => setTimeout(r, 5000));
        expect(response.statusCode).toBe(200);
        expect(response.body).toHaveProperty('data');
        if (!("data" in response.body)) {
          throw new Error("Expected response body to have 'data' property");
        }
        expect(response.body.data).toHaveProperty('markdown');
        expect(response.body.data).toHaveProperty('metadata');
        expect(response.body.data.metadata.statusCode).toBe(401);
      }, 60000);
      it.concurrent('should return a successful response for a scrape with 403 page', async () => {
        const response: ScrapeResponseRequestTest = await request(TEST_URL)
          .post('/v1/scrape')
          .set('Authorization', `Bearer ${process.env.TEST_API_KEY}`)
          .set('Content-Type', 'application/json')
          .send({ url: 'https://httpstat.us/403' });
        await new Promise((r) => setTimeout(r, 5000));
        expect(response.statusCode).toBe(200);
        expect(response.body).toHaveProperty('data');
        if (!("data" in response.body)) {
          throw new Error("Expected response body to have 'data' property");
        }
        expect(response.body.data).toHaveProperty('markdown');
        expect(response.body.data).toHaveProperty('metadata');
        expect(response.body.data.metadata.statusCode).toBe(403);
      }, 60000);
      it.concurrent('should return a successful response for a scrape with 404 page', async () => {
        const response: ScrapeResponseRequestTest = await request(TEST_URL)
          .post('/v1/scrape')
          .set('Authorization', `Bearer ${process.env.TEST_API_KEY}`)
          .set('Content-Type', 'application/json')
          .send({ url: 'https://httpstat.us/404' });
        await new Promise((r) => setTimeout(r, 5000));
        expect(response.statusCode).toBe(200);
        expect(response.body).toHaveProperty('data');
        if (!("data" in response.body)) {
          throw new Error("Expected response body to have 'data' property");
        }
        expect(response.body.data).toHaveProperty('markdown');
        expect(response.body.data).toHaveProperty('metadata');
        expect(response.body.data.metadata.statusCode).toBe(404);
      }, 60000);
      it.concurrent('should return a successful response for a scrape with 405 page', async () => {
        const response: ScrapeResponseRequestTest = await request(TEST_URL)
          .post('/v1/scrape')
          .set('Authorization', `Bearer ${process.env.TEST_API_KEY}`)
          .set('Content-Type', 'application/json')
          .send({ url: 'https://httpstat.us/405' });
        await new Promise((r) => setTimeout(r, 5000));
        expect(response.statusCode).toBe(200);
        expect(response.body).toHaveProperty('data');
        if (!("data" in response.body)) {
          throw new Error("Expected response body to have 'data' property");
        }
        expect(response.body.data).toHaveProperty('markdown');
        expect(response.body.data).toHaveProperty('metadata');
        expect(response.body.data.metadata.statusCode).toBe(405);
      }, 60000);
      it.concurrent('should return a successful response for a scrape with 500 page', async () => {
        const response: ScrapeResponseRequestTest = await request(TEST_URL)
          .post('/v1/scrape')
          .set('Authorization', `Bearer ${process.env.TEST_API_KEY}`)
          .set('Content-Type', 'application/json')
          .send({ url: 'https://httpstat.us/500' });
        await new Promise((r) => setTimeout(r, 5000));
        expect(response.statusCode).toBe(200);
        expect(response.body).toHaveProperty('data');
        if (!("data" in response.body)) {
          throw new Error("Expected response body to have 'data' property");
        }
        expect(response.body.data).toHaveProperty('markdown');
        expect(response.body.data).toHaveProperty('metadata');
        expect(response.body.data.metadata.statusCode).toBe(500);
      }, 60000);
      it.concurrent("should return a timeout error when scraping takes longer than the specified timeout", async () => {
        const response: ScrapeResponseRequestTest = await request(TEST_URL)
          .post("/v1/scrape")
          .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
          .set("Content-Type", "application/json")
          .send({ url: "https://firecrawl.dev", timeout: 1000 });
        expect(response.statusCode).toBe(408);
      }, 3000);
      it.concurrent(
        "should return a successful response with a valid API key and includeHtml set to true",
        async () => {
          const scrapeRequest: ScrapeRequest = {
            url: "https://roastmywebsite.ai",
            formats: ["html","rawHtml"],
          };
          const response: ScrapeResponseRequestTest = await request(TEST_URL)
            .post("/v1/scrape")
            .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
            .set("Content-Type", "application/json")
            .send(scrapeRequest);
          expect(response.statusCode).toBe(200);
          expect(response.body).toHaveProperty("data");
          if (!("data" in response.body)) {
            throw new Error("Expected response body to have 'data' property");
          }
          expect(response.body.data).not.toHaveProperty("markdown");
          expect(response.body.data).toHaveProperty("html");
          expect(response.body.data).toHaveProperty("rawHtml");
          expect(response.body.data).toHaveProperty("metadata");
          expect(response.body.data.html).toContain("<h1");
          expect(response.body.data.rawHtml).toContain("<html");
          expect(response.body.data.metadata.statusCode).toBe(200);
          expect(response.body.data.metadata.error).toBeUndefined();
        },
        30000
      );
      it.concurrent(
        "should return a successful response with waitFor",
        async () => {
          const scrapeRequest: ScrapeRequest = {
            url: "https://ycombinator.com/companies",
            formats: ["markdown"],
            waitFor: 5000
          };
          const response: ScrapeResponseRequestTest = await request(TEST_URL)
            .post("/v1/scrape")
            .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
            .set("Content-Type", "application/json")
            .send(scrapeRequest);
          expect(response.statusCode).toBe(200);
          expect(response.body).toHaveProperty("data");
          if (!("data" in response.body)) {
            throw new Error("Expected response body to have 'data' property");
          }
          expect(response.body.data).toHaveProperty("markdown");
          expect(response.body.data).not.toHaveProperty("html");
          expect(response.body.data).not.toHaveProperty("links");
          expect(response.body.data).not.toHaveProperty("rawHtml");
          expect(response.body.data).toHaveProperty("metadata");
          expect(response.body.data.markdown).toContain("PagerDuty");
          expect(response.body.data.metadata.statusCode).toBe(200);
          expect(response.body.data.metadata.error).toBeUndefined();
        },
        30000
      );
      it.concurrent(
        "should return a successful response with a valid links on page",
        async () => {
          const scrapeRequest: ScrapeRequest = {
            url: "https://roastmywebsite.ai",
            formats: ["links"],
          };
          const response: ScrapeResponseRequestTest = await request(TEST_URL)
            .post("/v1/scrape")
            .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
            .set("Content-Type", "application/json")
            .send(scrapeRequest);
          expect(response.statusCode).toBe(200);
          expect(response.body).toHaveProperty("data");
          if (!("data" in response.body)) {
            throw new Error("Expected response body to have 'data' property");
          }
          expect(response.body.data).not.toHaveProperty("html");
          expect(response.body.data).not.toHaveProperty("rawHtml");
          expect(response.body.data).toHaveProperty("links");
          expect(response.body.data).toHaveProperty("metadata");
          expect(response.body.data.links).toContain("https://firecrawl.dev");
          expect(response.body.data.metadata.statusCode).toBe(200);
          expect(response.body.data.metadata.error).toBeUndefined();
        },
        30000
      );
  });
 describe("POST /v1/map", () => {
  it.concurrent("should require authorization", async () => {
    const response: ScrapeResponseRequestTest = await request(TEST_URL).post(
      "/v1/map"
    );
    expect(response.statusCode).toBe(401);
  });
  it.concurrent("should return an error response with an invalid API key", async () => {
    const response: ScrapeResponseRequestTest = await request(TEST_URL)
      .post("/v1/map")
      .set("Authorization", `Bearer invalid-api-key`)
      .set("Content-Type", "application/json")
      .send({ url: "https://firecrawl.dev" });
    expect(response.statusCode).toBe(401);
  });
  it.concurrent("should return a successful response with a valid API key", async () => {
    const mapRequest = {
      url: "https://roastmywebsite.ai"
    };
    const response: ScrapeResponseRequestTest = await request(TEST_URL)
      .post("/v1/map")
      .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
      .set("Content-Type", "application/json")
      .send(mapRequest);
    expect(response.statusCode).toBe(200);
    expect(response.body).toHaveProperty("success", true);
    expect(response.body).toHaveProperty("links");
    if (!("links" in response.body)) {
      throw new Error("Expected response body to have 'links' property");
    }
    const links = response.body.links as unknown[];
    expect(Array.isArray(links)).toBe(true);
    expect(links.length).toBeGreaterThan(0);
  });
  it.concurrent("should return a successful response with a valid API key and search", async () => {
    const mapRequest = {
      url: "https://usemotion.com",
      search: "pricing"
    };
    const response: ScrapeResponseRequestTest = await request(TEST_URL)
      .post("/v1/map")
      .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
      .set("Content-Type", "application/json")
      .send(mapRequest);
    expect(response.statusCode).toBe(200);
    expect(response.body).toHaveProperty("success", true);
    expect(response.body).toHaveProperty("links");
    if (!("links" in response.body)) {
      throw new Error("Expected response body to have 'links' property");
    }
    const links = response.body.links as unknown[];
    expect(Array.isArray(links)).toBe(true);
    expect(links.length).toBeGreaterThan(0);
    expect(links[0]).toContain("usemotion.com/pricing");
  });
  it.concurrent("should return a successful response with a valid API key and search and allowSubdomains", async () => {
    const mapRequest = {
      url: "https://firecrawl.dev",
      search: "docs",
      includeSubdomains: true
    };
    const response: ScrapeResponseRequestTest = await request(TEST_URL)
      .post("/v1/map")
      .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
      .set("Content-Type", "application/json")
      .send(mapRequest);
    expect(response.statusCode).toBe(200);
    expect(response.body).toHaveProperty("success", true);
    expect(response.body).toHaveProperty("links");
    if (!("links" in response.body)) {
      throw new Error("Expected response body to have 'links' property");
    }
    const links = response.body.links as unknown[];
    expect(Array.isArray(links)).toBe(true);
    expect(links.length).toBeGreaterThan(0);
    expect(links[0]).toContain("docs.firecrawl.dev");
  });
  it.concurrent("should return a successful response with a valid API key and search and allowSubdomains and www", async () => {
    const mapRequest = {
      url: "https://www.firecrawl.dev",
      search: "docs",
      includeSubdomains: true
    };
    const response: ScrapeResponseRequestTest = await request(TEST_URL)
      .post("/v1/map")
      .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
      .set("Content-Type", "application/json")
      .send(mapRequest);
    expect(response.statusCode).toBe(200);
    expect(response.body).toHaveProperty("success", true);
    expect(response.body).toHaveProperty("links");
    if (!("links" in response.body)) {
      throw new Error("Expected response body to have 'links' property");
    }
    const links = response.body.links as unknown[];
    expect(Array.isArray(links)).toBe(true);
    expect(links.length).toBeGreaterThan(0);
    expect(links[0]).toContain("docs.firecrawl.dev");
  }, 10000)
  it.concurrent("should return a successful response with a valid API key and search and not allowSubdomains and www", async () => {
    const mapRequest = {
      url: "https://www.firecrawl.dev",
      search: "docs",
      includeSubdomains: false
    };
    const response: ScrapeResponseRequestTest = await request(TEST_URL)
      .post("/v1/map")
      .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
      .set("Content-Type", "application/json")
      .send(mapRequest);
    expect(response.statusCode).toBe(200);
    expect(response.body).toHaveProperty("success", true);
    expect(response.body).toHaveProperty("links");
    if (!("links" in response.body)) {
      throw new Error("Expected response body to have 'links' property");
    }
    const links = response.body.links as unknown[];
    expect(Array.isArray(links)).toBe(true);
    expect(links.length).toBeGreaterThan(0);
    expect(links[0]).not.toContain("docs.firecrawl.dev");
  })
  it.concurrent("should return an error for invalid URL", async () => {
    const mapRequest = {
      url: "invalid-url",
      includeSubdomains: true,
      search: "test",
    };
    const response: ScrapeResponseRequestTest = await request(TEST_URL)
      .post("/v1/map")
      .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
      .set("Content-Type", "application/json")
      .send(mapRequest);
    expect(response.statusCode).toBe(400);
    expect(response.body).toHaveProperty("success", false);
    expect(response.body).toHaveProperty("error");
  });
 });
 describe("POST /v1/crawl", () => {
  it.concurrent("should require authorization", async () => {
    const response: ScrapeResponseRequestTest = await request(TEST_URL).post(
      "/v1/crawl"
    );
    expect(response.statusCode).toBe(401);
  });
  it.concurrent("should throw error for blocklisted URL", async () => {
    const scrapeRequest: ScrapeRequest = {
      url: "https://facebook.com/fake-test",
    };
    const response = await request(TEST_URL)
      .post("/v1/crawl")
      .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
      .set("Content-Type", "application/json")
      .send(scrapeRequest);
    expect(response.statusCode).toBe(403);
    expect(response.body.error).toBe("URL is blocked. Firecrawl currently does not support social media scraping due to policy restrictions.");
  });
  it.concurrent(
    "should return an error response with an invalid API key",
    async () => {
      const response: ScrapeResponseRequestTest = await request(TEST_URL)
        .post("/v1/crawl")
        .set("Authorization", `Bearer invalid-api-key`)
        .set("Content-Type", "application/json")
        .send({ url: "https://firecrawl.dev" });
      expect(response.statusCode).toBe(401);
    }
  );
  it.concurrent("should return a successful response", async () => {
    const response = await request(TEST_URL)
      .post("/v1/crawl")
      .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
      .set("Content-Type", "application/json")
      .send({ url: "https://firecrawl.dev" });
    expect(response.statusCode).toBe(200);
    expect(response.body).toHaveProperty("id");
    expect(response.body.id).toMatch(
      /^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[1-5][0-9a-fA-F]{3}-[89abAB][0-9a-fA-F]{3}-[0-9a-fA-F]{12}$/
    );
    expect(response.body).toHaveProperty("success", true);
    expect(response.body).toHaveProperty("url");
    expect(response.body.url).toContain("/v1/crawl/");
  });
  it.concurrent(
    "should return a successful response with a valid API key and valid includes option",
    async () => {
      const crawlResponse = await request(TEST_URL)
        .post("/v1/crawl")
        .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
        .set("Content-Type", "application/json")
        .send({
          url: "https://firecrawl.dev",
          limit: 10,
          includePaths: ["blog/*"],
        });
      let response;
      let isFinished = false;
      while (!isFinished) {
        response = await request(TEST_URL)
          .get(`/v1/crawl/${crawlResponse.body.id}`)
          .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
        expect(response.statusCode).toBe(200);
        expect(response.body).toHaveProperty("status");
        isFinished = response.body.status === "completed";
        if (!isFinished) {
          await new Promise((resolve) => setTimeout(resolve, 1000)); // Wait for 1 second before checking again
        }
      }
      await new Promise((resolve) => setTimeout(resolve, 1000)); // wait for data to be saved on the database
      const completedResponse = await request(TEST_URL)
        .get(`/v1/crawl/${crawlResponse.body.id}`)
        .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
      const urls = completedResponse.body.data.map(
        (item: any) => item.metadata?.sourceURL
      );
      expect(urls.length).toBeGreaterThan(5);
      urls.forEach((url: string) => {
        expect(url).toContain("firecrawl.dev/blog");
      });
      expect(completedResponse.statusCode).toBe(200);
      expect(completedResponse.body).toHaveProperty("status");
      expect(completedResponse.body.status).toBe("completed");
      expect(completedResponse.body).toHaveProperty("data");
      expect(completedResponse.body.data[0]).toHaveProperty("markdown");
      expect(completedResponse.body.data[0]).toHaveProperty("metadata");
      expect(completedResponse.body.data[0]).not.toHaveProperty("content"); // v0
      expect(completedResponse.body.data[0].metadata.statusCode).toBe(200);
      expect(completedResponse.body.data[0].metadata.error).toBeUndefined();
    },
    180000
  ); // 180 seconds
  it.concurrent(
    "should return a successful response with a valid API key and valid excludes option",
    async () => {
      const crawlResponse = await request(TEST_URL)
        .post("/v1/crawl")
        .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
        .set("Content-Type", "application/json")
        .send({
          url: "https://firecrawl.dev",
          limit: 10,
          excludePaths: ["blog/*"],
        });
      let isFinished = false;
      let response;
      while (!isFinished) {
        response = await request(TEST_URL)
          .get(`/v1/crawl/${crawlResponse.body.id}`)
          .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
        expect(response.statusCode).toBe(200);
        expect(response.body).toHaveProperty("status");
        isFinished = response.body.status === "completed";
        if (!isFinished) {
          await new Promise((resolve) => setTimeout(resolve, 1000)); // Wait for 1 second before checking again
        }
      }
      await new Promise((resolve) => setTimeout(resolve, 1000)); // wait for data to be saved on the database
      const completedResponse = await request(
        TEST_URL
      )
        .get(`/v1/crawl/${crawlResponse.body.id}`)
        .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
      const urls = completedResponse.body.data.map(
        (item: any) => item.metadata?.sourceURL
      );
      expect(urls.length).toBeGreaterThan(3);
      urls.forEach((url: string) => {
        expect(url.startsWith("https://www.firecrawl.dev/blog/")).toBeFalsy();
      });
    },
    90000
  ); // 90 seconds
  it.concurrent(
    "should return a successful response with max depth option for a valid crawl job",
    async () => {
      const crawlResponse = await request(TEST_URL)
        .post("/v1/crawl")
        .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
        .set("Content-Type", "application/json")
        .send({
          url: "https://www.scrapethissite.com",
          maxDepth: 1,
        });
      expect(crawlResponse.statusCode).toBe(200);
      const response = await request(TEST_URL)
        .get(`/v1/crawl/${crawlResponse.body.id}`)
        .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
      expect(response.statusCode).toBe(200);
      expect(response.body).toHaveProperty("status");
      expect(["active", "waiting", "completed", "scraping"]).toContain(response.body.status);
      // wait for 60 seconds
      let isCompleted = false;
      while (!isCompleted) {
        const statusCheckResponse = await request(TEST_URL)
          .get(`/v1/crawl/${crawlResponse.body.id}`)
          .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
        expect(statusCheckResponse.statusCode).toBe(200);
        isCompleted = statusCheckResponse.body.status === "completed";
        if (!isCompleted) {
          await new Promise((resolve) => setTimeout(resolve, 1000)); // Wait for 1 second before checking again
        }
      }
      const completedResponse = await request(
        TEST_URL
      )
        .get(`/v1/crawl/${crawlResponse.body.id}`)
        .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
      expect(completedResponse.statusCode).toBe(200);
      expect(completedResponse.body).toHaveProperty("status");
      expect(completedResponse.body.status).toBe("completed");
      expect(completedResponse.body).toHaveProperty("data");
      expect(completedResponse.body.data[0]).not.toHaveProperty("content");
      expect(completedResponse.body.data[0]).toHaveProperty("markdown");
      expect(completedResponse.body.data[0]).toHaveProperty("metadata");
      expect(completedResponse.body.data[0].metadata.statusCode).toBe(200);
      expect(completedResponse.body.data[0].metadata.error).toBeUndefined();
      const urls = completedResponse.body.data.map(
        (item: any) => item.metadata?.sourceURL
      );
      expect(urls.length).toBeGreaterThanOrEqual(1);
      // Check if all URLs have a maximum depth of 1
      urls.forEach((url: string) => {
        const pathSplits = new URL(url).pathname.split("/");
        const depth =
          pathSplits.length -
          (pathSplits[0].length === 0 &&
          pathSplits[pathSplits.length - 1].length === 0
            ? 1
            : 0);
        expect(depth).toBeLessThanOrEqual(2);
      });
    },
    180000
  );
 })
 describe("GET /v1/crawl/:jobId", () => {
  it.concurrent("should require authorization", async () => {
    const response = await request(TEST_URL).get("/v1/crawl/123");
    expect(response.statusCode).toBe(401);
  });
  it.concurrent(
    "should return an error response with an invalid API key",
    async () => {
      const response = await request(TEST_URL)
        .get("/v1/crawl/123")
        .set("Authorization", `Bearer invalid-api-key`);
      expect(response.statusCode).toBe(401);
    }
  );
  it.concurrent(
    "should return Job not found for invalid job ID",
    async () => {
      const response = await request(TEST_URL)
        .get("/v1/crawl/invalidJobId")
        .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
      expect(response.statusCode).toBe(404);
    }
  );
  it.concurrent(
    "should return a successful crawl status response for a valid crawl job",
    async () => {
      const crawlResponse = await request(TEST_URL)
        .post("/v1/crawl")
        .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
        .set("Content-Type", "application/json")
        .send({ url: "https://docs.mendable.ai" });
      expect(crawlResponse.statusCode).toBe(200);
      let isCompleted = false;
      while (!isCompleted) {
        const response = await request(TEST_URL)
          .get(`/v1/crawl/${crawlResponse.body.id}`)
          .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
        expect(response.statusCode).toBe(200);
        expect(response.body).toHaveProperty("status");
        if (response.body.status === "completed") {
          isCompleted = true;
        } else {
          await new Promise((r) => setTimeout(r, 1000)); // Wait for 1 second before checking again
        }
      }
      await new Promise((resolve) => setTimeout(resolve, 1000)); // wait for data to be saved on the database
      const completedResponse = await request(TEST_URL)
        .get(`/v1/crawl/${crawlResponse.body.id}`)
        .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
      expect(completedResponse.body).toHaveProperty("status");
      expect(completedResponse.body.status).toBe("completed");
      expect(completedResponse.body).toHaveProperty("data");
      expect(completedResponse.body.data[0]).not.toHaveProperty("content");
      expect(completedResponse.body.data[0]).toHaveProperty("markdown");
      expect(completedResponse.body.data[0]).toHaveProperty("metadata");
      expect(completedResponse.body.data[0].metadata.statusCode).toBe(
        200
      );
      expect(
        completedResponse.body.data[0].metadata.error
      ).toBeUndefined();
      const childrenLinks = completedResponse.body.data.filter(
        (doc) =>
          doc.metadata &&
          doc.metadata.sourceURL
      );
      expect(childrenLinks.length).toBe(completedResponse.body.data.length);
    },
    180000
  ); // 120 seconds
  it.concurrent(
    "If someone cancels a crawl job, it should turn into failed status",
    async () => {
      const crawlResponse = await request(TEST_URL)
        .post("/v1/crawl")
        .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
        .set("Content-Type", "application/json")
        .send({ url: "https://docs.tatum.io", limit: 200 });
      expect(crawlResponse.statusCode).toBe(200);
      await new Promise((r) => setTimeout(r, 10000));
      const responseCancel = await request(TEST_URL)
        .delete(`/v1/crawl/${crawlResponse.body.id}`)
        .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
      expect(responseCancel.statusCode).toBe(200);
      expect(responseCancel.body).toHaveProperty("status");
      expect(responseCancel.body.status).toBe("cancelled");
      await new Promise((r) => setTimeout(r, 10000));
      const completedResponse = await request(TEST_URL)
        .get(`/v1/crawl/${crawlResponse.body.id}`)
        .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
      expect(completedResponse.statusCode).toBe(200);
      expect(completedResponse.body).toHaveProperty("status");
      expect(completedResponse.body.status).toBe("cancelled");
      expect(completedResponse.body).toHaveProperty("data");
      expect(completedResponse.body.data[0]).toHaveProperty("markdown");
      expect(completedResponse.body.data[0]).toHaveProperty("metadata");
      expect(completedResponse.body.data[0].metadata.statusCode).toBe(200);
      expect(completedResponse.body.data[0].metadata.error).toBeUndefined();
    },
    60000
  ); // 60 seconds
 })
 });
--- a/apps/api/src/tests/e2e_withAuth/index.test.ts
+++ b/apps/api/src/tests/e2e_withAuth/index.test.ts
--- a/apps/api/src/controllers/tests/crawl.test.ts
+++ b/apps/api/src/controllers/tests/crawl.test.ts
@ -1,4 +1,4 @@
-import { crawlController } from '../crawl'
+import { crawlController } from '../v0/crawl'
 import { Request, Response } from 'express';
 import { authenticateUser } from '../auth'; // Ensure this import is correct
 import { createIdempotencyKey } from '../../services/idempotency/create';
--- a/apps/api/src/controllers/admin/queue.ts
+++ b/apps/api/src/controllers/admin/queue.ts
@ -1,87 +0,0 @@
 import { Request, Response } from "express";
 import { Job } from "bull";
 import { Logger } from "../../lib/logger";
 import { getWebScraperQueue } from "../../services/queue-service";
 import { checkAlerts } from "../../services/alerts";
 export async function cleanBefore24hCompleteJobsController(
  req: Request,
  res: Response
 ) {
  Logger.info("🐂 Cleaning jobs older than 24h");
  try {
    const webScraperQueue = getWebScraperQueue();
    const batchSize = 10;
    const numberOfBatches = 9; // Adjust based on your needs
    const completedJobsPromises: Promise<Job[]>[] = [];
    for (let i = 0; i < numberOfBatches; i++) {
      completedJobsPromises.push(
        webScraperQueue.getJobs(
          ["completed"],
          i * batchSize,
          i * batchSize + batchSize,
          true
        )
      );
    }
    const completedJobs: Job[] = (
      await Promise.all(completedJobsPromises)
    ).flat();
    const before24hJobs =
      completedJobs.filter(
        (job) => job.finishedOn < Date.now() - 24 * 60 * 60 * 1000
      ) || [];
    let count = 0;
    if (!before24hJobs) {
      return res.status(200).send(`No jobs to remove.`);
    }
    for (const job of before24hJobs) {
      try {
        await job.remove();
        count++;
      } catch (jobError) {
        Logger.error(`🐂 Failed to remove job with ID ${job.id}: ${jobError}`);
      }
    }
    return res.status(200).send(`Removed ${count} completed jobs.`);
  } catch (error) {
    Logger.error(`🐂 Failed to clean last 24h complete jobs: ${error}`);
    return res.status(500).send("Failed to clean jobs");
  }
 }
 export async function checkQueuesController(req: Request, res: Response) {
    try {
      await checkAlerts();
      return res.status(200).send("Alerts initialized");
    } catch (error) {
      Logger.debug(`Failed to initialize alerts: ${error}`);
      return res.status(500).send("Failed to initialize alerts");
    }
  }
  // Use this as a "health check" that way we dont destroy the server
 export async function queuesController(req: Request, res: Response) {
    try {
      const webScraperQueue = getWebScraperQueue();
      const [webScraperActive] = await Promise.all([
        webScraperQueue.getActiveCount(),
      ]);
      const noActiveJobs = webScraperActive === 0;
      // 200 if no active jobs, 503 if there are active jobs
      return res.status(noActiveJobs ? 200 : 500).json({
        webScraperActive,
        noActiveJobs,
      });
    } catch (error) {
      Logger.error(error);
      return res.status(500).json({ error: error.message });
    }
  }
--- a/apps/api/src/controllers/auth.ts
+++ b/apps/api/src/controllers/auth.ts
@ -1,21 +1,36 @@
-import { parseApi } from "../../src/lib/parseApi";
+import { parseApi } from "../lib/parseApi";
-import { getRateLimiter } from "../../src/services/rate-limiter";
+import { getRateLimiter } from "../services/rate-limiter";
 import {
  AuthResponse,
  NotificationType,
  PlanType,
  RateLimiterMode,
-} from "../../src/types";
+} from "../types";
-import { supabase_service } from "../../src/services/supabase";
+import { supabase_service } from "../services/supabase";
-import { withAuth } from "../../src/lib/withAuth";
+import { withAuth } from "../lib/withAuth";
 import { RateLimiterRedis } from "rate-limiter-flexible";
 import { setTraceAttributes } from "@hyperdx/node-opentelemetry";
 import { sendNotification } from "../services/notification/email_notification";
 import { Logger } from "../lib/logger";
-import { redlock } from "../../src/services/redlock";
+import { redlock } from "../services/redlock";
-import { getValue } from "../../src/services/redis";
+import { getValue } from "../services/redis";
-import { setValue } from "../../src/services/redis";
+import { setValue } from "../services/redis";
 import { validate } from "uuid";
-
+import * as Sentry from "@sentry/node";
 // const { data, error } = await supabase_service
 //     .from('api_keys')
 //     .select(`
 //       key,
 //       team_id,
 //       teams (
 //         subscriptions (
 //           price_id
 //         )
 //       )
 //     `)
 //     .eq('key', normalizedApi)
 //     .limit(1)
 //     .single();
 function normalizedApiIsUuid(potentialUuid: string): boolean {
  // Check if the string is a valid UUID
  return validate(potentialUuid);
@ -34,6 +49,7 @@ function setTrace(team_id: string, api_key: string) {
      api_key,
    });
  } catch (error) {
    Sentry.captureException(error);
    Logger.error(`Error setting trace attributes: ${error.message}`);
  }
 }
@ -49,6 +65,7 @@ async function getKeyAndPriceId(normalizedApi: string): Promise<{
    api_key: normalizedApi,
  });
  if (error) {
    Sentry.captureException(error);
    Logger.error(`RPC ERROR (get_key_and_price_id_2): ${error.message}`);
    return {
      success: false,
@ -58,7 +75,10 @@ async function getKeyAndPriceId(normalizedApi: string): Promise<{
    };
  }
  if (!data || data.length === 0) {
-    Logger.warn(`Error fetching api key: ${error.message} or data is empty`);
+    if (error) {
      Logger.warn(`Error fetching api key: ${error.message} or data is empty`);
      Sentry.captureException(error);
    }
    // TODO: change this error code ?
    return {
      success: false,
@ -82,7 +102,7 @@ export async function supaAuthenticateUser(
  team_id?: string;
  error?: string;
  status?: number;
-  plan?: string;
+  plan?: PlanType;
 }> {
  const authHeader = req.headers.authorization;
  if (!authHeader) {
@ -112,7 +132,11 @@ export async function supaAuthenticateUser(
  let priceId: string | null = null;
  if (token == "this_is_just_a_preview_token") {
-    rateLimiter = getRateLimiter(RateLimiterMode.Preview, token);
+    if (mode == RateLimiterMode.CrawlStatus) {
      rateLimiter = getRateLimiter(RateLimiterMode.CrawlStatus, token);
    } else {
      rateLimiter = getRateLimiter(RateLimiterMode.Preview, token);
    }      
    teamId = "preview";
  } else {
    normalizedApi = parseApi(token);
@ -148,11 +172,12 @@ export async function supaAuthenticateUser(
        await setValue(
          cacheKey,
          JSON.stringify({ team_id: teamId, price_id: priceId }),
-          10
+          60
        );
      }
    } catch (error) {
-      Logger.error(`Error with auth function: ${error.message}`);
+      Sentry.captureException(error);
      Logger.error(`Error with auth function: ${error}`);
      // const {
      //   success,
      //   teamId: tId,
@ -215,7 +240,8 @@ export async function supaAuthenticateUser(
        rateLimiter = getRateLimiter(
          RateLimiterMode.Scrape,
          token,
-          subscriptionData.plan
+          subscriptionData.plan,
          teamId
        );
        break;
      case RateLimiterMode.Search:
@ -225,6 +251,13 @@ export async function supaAuthenticateUser(
          subscriptionData.plan
        );
        break;
      case RateLimiterMode.Map:
        rateLimiter = getRateLimiter(
          RateLimiterMode.Map,
          token,
          subscriptionData.plan
        );
        break;
      case RateLimiterMode.CrawlStatus:
        rateLimiter = getRateLimiter(RateLimiterMode.CrawlStatus, token);
        break;
@ -268,7 +301,7 @@ export async function supaAuthenticateUser(
    return {
      success: false,
-      error: `Rate limit exceeded. Consumed points: ${rateLimiterRes.consumedPoints}, Remaining points: ${rateLimiterRes.remainingPoints}. Upgrade your plan at https://firecrawl.dev/pricing for increased rate limits or please retry after ${secs}s, resets at ${retryDate}`,
+      error: `Rate limit exceeded. Consumed (req/min): ${rateLimiterRes.consumedPoints}, Remaining (req/min): ${rateLimiterRes.remainingPoints}. Upgrade your plan at https://firecrawl.dev/pricing for increased rate limits or please retry after ${secs}s, resets at ${retryDate}`,
      status: 429,
    };
  }
@ -277,6 +310,9 @@ export async function supaAuthenticateUser(
    token === "this_is_just_a_preview_token" &&
    (mode === RateLimiterMode.Scrape ||
      mode === RateLimiterMode.Preview ||
      mode === RateLimiterMode.Map ||
      mode === RateLimiterMode.Crawl ||
      mode === RateLimiterMode.CrawlStatus ||
      mode === RateLimiterMode.Search)
  ) {
    return { success: true, team_id: "preview" };
@ -302,7 +338,10 @@ export async function supaAuthenticateUser(
      .eq("key", normalizedApi);
    if (error || !data || data.length === 0) {
-      Logger.warn(`Error fetching api key: ${error.message} or data is empty`);
+      if (error) {
        Sentry.captureException(error);
        Logger.warn(`Error fetching api key: ${error.message} or data is empty`);
      }
      return {
        success: false,
        error: "Unauthorized: Invalid token",
@ -316,10 +355,10 @@ export async function supaAuthenticateUser(
  return {
    success: true,
    team_id: subscriptionData.team_id,
-    plan: subscriptionData.plan ?? "",
+    plan: (subscriptionData.plan ?? "") as PlanType,
  };
 }
-function getPlanByPriceId(price_id: string) {
+function getPlanByPriceId(price_id: string): PlanType {
  switch (price_id) {
    case process.env.STRIPE_PRICE_ID_STARTER:
      return "starter";
@ -336,6 +375,8 @@ function getPlanByPriceId(price_id: string) {
    case process.env.STRIPE_PRICE_ID_GROWTH:
    case process.env.STRIPE_PRICE_ID_GROWTH_YEARLY:
      return "growth";
    case process.env.STRIPE_PRICE_ID_GROWTH_DOUBLE_MONTHLY:
      return "growthdouble";
    default:
      return "free";
  }
--- a/apps/api/src/controllers/crawl-status.ts
+++ b/apps/api/src/controllers/crawl-status.ts
@ -1,51 +0,0 @@
 import { Request, Response } from "express";
 import { authenticateUser } from "./auth";
 import { RateLimiterMode } from "../../src/types";
 import { addWebScraperJob } from "../../src/services/queue-jobs";
 import { getWebScraperQueue } from "../../src/services/queue-service";
 import { supabaseGetJobById } from "../../src/lib/supabase-jobs";
 import { Logger } from "../../src/lib/logger";
 export async function crawlStatusController(req: Request, res: Response) {
  try {
    const { success, team_id, error, status } = await authenticateUser(
      req,
      res,
      RateLimiterMode.CrawlStatus
    );
    if (!success) {
      return res.status(status).json({ error });
    }
    const job = await getWebScraperQueue().getJob(req.params.jobId);
    if (!job) {
      return res.status(404).json({ error: "Job not found" });
    }
    const { current, current_url, total, current_step, partialDocs } = await job.progress();
    let data = job.returnvalue;
    if (process.env.USE_DB_AUTHENTICATION === "true") {
      const supabaseData = await supabaseGetJobById(req.params.jobId);
      if (supabaseData) {
        data = supabaseData.docs;
      }
    }
    const jobStatus = await job.getState();
    res.json({
      status: jobStatus,
      // progress: job.progress(),
      current,
      current_url,
      current_step,
      total,
      data: data ? data : null,
      partial_data: jobStatus == 'completed' ? [] : partialDocs,
    });
  } catch (error) {
    Logger.error(error);
    return res.status(500).json({ error: error.message });
  }
 }
--- a/apps/api/src/controllers/crawl.ts
+++ b/apps/api/src/controllers/crawl.ts
@ -1,110 +0,0 @@
 import { Request, Response } from "express";
 import { WebScraperDataProvider } from "../../src/scraper/WebScraper";
 import { billTeam } from "../../src/services/billing/credit_billing";
 import { checkTeamCredits } from "../../src/services/billing/credit_billing";
 import { authenticateUser } from "./auth";
 import { RateLimiterMode } from "../../src/types";
 import { addWebScraperJob } from "../../src/services/queue-jobs";
 import { isUrlBlocked } from "../../src/scraper/WebScraper/utils/blocklist";
 import { logCrawl } from "../../src/services/logging/crawl_log";
 import { validateIdempotencyKey } from "../../src/services/idempotency/validate";
 import { createIdempotencyKey } from "../../src/services/idempotency/create";
 import { defaultCrawlPageOptions, defaultCrawlerOptions, defaultOrigin } from "../../src/lib/default-values";
 import { v4 as uuidv4 } from "uuid";
 import { Logger } from "../../src/lib/logger";
 export async function crawlController(req: Request, res: Response) {
  try {
    const { success, team_id, error, status } = await authenticateUser(
      req,
      res,
      RateLimiterMode.Crawl
    );
    if (!success) {
      return res.status(status).json({ error });
    }
    if (req.headers["x-idempotency-key"]) {
      const isIdempotencyValid = await validateIdempotencyKey(req);
      if (!isIdempotencyValid) {
        return res.status(409).json({ error: "Idempotency key already used" });
      }
      try {
        createIdempotencyKey(req);
      } catch (error) {
        Logger.error(error);
        return res.status(500).json({ error: error.message });
      }
    }
    const { success: creditsCheckSuccess, message: creditsCheckMessage } =
      await checkTeamCredits(team_id, 1);
    if (!creditsCheckSuccess) {
      return res.status(402).json({ error: "Insufficient credits" });
    }
    const url = req.body.url;
    if (!url) {
      return res.status(400).json({ error: "Url is required" });
    }
    if (isUrlBlocked(url)) {
      return res
        .status(403)
        .json({
          error:
            "Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it.",
        });
    }
    const mode = req.body.mode ?? "crawl";
    const crawlerOptions = { ...defaultCrawlerOptions, ...req.body.crawlerOptions };
    const pageOptions = { ...defaultCrawlPageOptions, ...req.body.pageOptions };
    if (mode === "single_urls" && !url.includes(",")) { // NOTE: do we need this?
      try {
        const a = new WebScraperDataProvider();
        await a.setOptions({
          jobId: uuidv4(),
          mode: "single_urls",
          urls: [url],
          crawlerOptions: { ...crawlerOptions, returnOnlyUrls: true },
          pageOptions: pageOptions,
        });
        const docs = await a.getDocuments(false, (progress) => {
          job.progress({
            current: progress.current,
            total: progress.total,
            current_step: "SCRAPING",
            current_url: progress.currentDocumentUrl,
          });
        });
        return res.json({
          success: true,
          documents: docs,
        });
      } catch (error) {
        Logger.error(error);
        return res.status(500).json({ error: error.message });
      }
    }
    const job = await addWebScraperJob({
      url: url,
      mode: mode ?? "crawl", // fix for single urls not working
      crawlerOptions: crawlerOptions,
      team_id: team_id,
      pageOptions: pageOptions,
      origin: req.body.origin ?? defaultOrigin,
    });
    await logCrawl(job.id.toString(), team_id);
    res.json({ jobId: job.id });
  } catch (error) {
    Logger.error(error);
    return res.status(500).json({ error: error.message });
  }
 }
--- a/apps/api/src/controllers/crawlPreview.ts
+++ b/apps/api/src/controllers/crawlPreview.ts
@ -1,46 +0,0 @@
 import { Request, Response } from "express";
 import { authenticateUser } from "./auth";
 import { RateLimiterMode } from "../../src/types";
 import { addWebScraperJob } from "../../src/services/queue-jobs";
 import { isUrlBlocked } from "../../src/scraper/WebScraper/utils/blocklist";
 import { Logger } from "../../src/lib/logger";
 export async function crawlPreviewController(req: Request, res: Response) {
  try {
    const { success, team_id, error, status } = await authenticateUser(
      req,
      res,
      RateLimiterMode.Preview
    );
    if (!success) {
      return res.status(status).json({ error });
    }
    // authenticate on supabase
    const url = req.body.url;
    if (!url) {
      return res.status(400).json({ error: "Url is required" });
    }
    if (isUrlBlocked(url)) {
      return res.status(403).json({ error: "Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it." });
    }
    const mode = req.body.mode ?? "crawl";
    const crawlerOptions = req.body.crawlerOptions ?? {};
    const pageOptions = req.body.pageOptions ?? { onlyMainContent: false, includeHtml: false, removeTags: [] };
    const job = await addWebScraperJob({
      url: url,
      mode: mode ?? "crawl", // fix for single urls not working
      crawlerOptions: { ...crawlerOptions, limit: 5, maxCrawledLinks: 5 },
      team_id: "preview",
      pageOptions: pageOptions,
      origin: "website-preview",
    });
    res.json({ jobId: job.id });
  } catch (error) {
    Logger.error(error);
    return res.status(500).json({ error: error.message });
  }
 }
--- a/apps/api/src/controllers/scrape.ts
+++ b/apps/api/src/controllers/scrape.ts
@ -1,196 +0,0 @@
 import { ExtractorOptions, PageOptions } from './../lib/entities';
 import { Request, Response } from "express";
 import { WebScraperDataProvider } from "../scraper/WebScraper";
 import { billTeam, checkTeamCredits } from "../services/billing/credit_billing";
 import { authenticateUser } from "./auth";
 import { RateLimiterMode } from "../types";
 import { logJob } from "../services/logging/log_job";
 import { Document } from "../lib/entities";
 import { isUrlBlocked } from "../scraper/WebScraper/utils/blocklist"; // Import the isUrlBlocked function
 import { numTokensFromString } from '../lib/LLM-extraction/helpers';
 import { defaultPageOptions, defaultExtractorOptions, defaultTimeout, defaultOrigin } from '../lib/default-values';
 import { v4 as uuidv4 } from "uuid";
 import { Logger } from '../lib/logger';
 export async function scrapeHelper(
  jobId: string,
  req: Request,
  team_id: string,
  crawlerOptions: any,
  pageOptions: PageOptions,
  extractorOptions: ExtractorOptions,
  timeout: number,
  plan?: string
 ): Promise<{
  success: boolean;
  error?: string;
  data?: Document;
  returnCode: number;
 }> {
  const url = req.body.url;
  if (!url) {
    return { success: false, error: "Url is required", returnCode: 400 };
  }
  if (isUrlBlocked(url)) {
    return { success: false, error: "Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it.", returnCode: 403 };
  }
  const a = new WebScraperDataProvider();
  await a.setOptions({
    jobId,
    mode: "single_urls",
    urls: [url],
    crawlerOptions: {
      ...crawlerOptions,
    },
    pageOptions: pageOptions,
    extractorOptions: extractorOptions,
  });
  const timeoutPromise = new Promise<{ success: boolean; error?: string; returnCode: number }>((_, reject) =>
    setTimeout(() => reject({ success: false, error: "Request timed out. Increase the timeout by passing `timeout` param to the request.", returnCode: 408 }), timeout)
  );
  const docsPromise = a.getDocuments(false);
  let docs;
  try {
    docs = await Promise.race([docsPromise, timeoutPromise]);
  } catch (error) {
    return error;
  }
  // make sure doc.content is not empty
  let filteredDocs = docs.filter(
    (doc: { content?: string }) => doc.content && doc.content.trim().length > 0
  );
  if (filteredDocs.length === 0) {
    return { success: true, error: "No page found", returnCode: 200, data: docs[0] };
  }
  // Remove rawHtml if pageOptions.rawHtml is false and extractorOptions.mode is llm-extraction-from-raw-html
  if (!pageOptions.includeRawHtml && extractorOptions.mode == "llm-extraction-from-raw-html") {
    filteredDocs.forEach(doc => {
      delete doc.rawHtml;
    });
  }
  return {
    success: true,
    data: filteredDocs[0],
    returnCode: 200,
  };
 }
 export async function scrapeController(req: Request, res: Response) {
  try {
    let earlyReturn = false;
    // make sure to authenticate user first, Bearer <token>
    const { success, team_id, error, status, plan } = await authenticateUser(
      req,
      res,
      RateLimiterMode.Scrape
    );
    if (!success) {
      return res.status(status).json({ error });
    }
    const crawlerOptions = req.body.crawlerOptions ?? {};
    const pageOptions = { ...defaultPageOptions, ...req.body.pageOptions };
    const extractorOptions = { ...defaultExtractorOptions, ...req.body.extractorOptions };
    const origin = req.body.origin ?? defaultOrigin;
    let timeout = req.body.timeout ?? defaultTimeout;
    if (extractorOptions.mode.includes("llm-extraction")) {
      pageOptions.onlyMainContent = true;
      timeout = req.body.timeout ?? 90000;
    }
    const checkCredits = async () => {
      try {
        const { success: creditsCheckSuccess, message: creditsCheckMessage } = await checkTeamCredits(team_id, 1);
        if (!creditsCheckSuccess) {
          earlyReturn = true;
          return res.status(402).json({ error: "Insufficient credits" });
        }
      } catch (error) {
        Logger.error(error);
        earlyReturn = true;
        return res.status(500).json({ error: "Error checking team credits. Please contact hello@firecrawl.com for help." });
      }
    };
    await checkCredits();
    const jobId = uuidv4();
    const startTime = new Date().getTime();
    const result = await scrapeHelper(
      jobId,
      req,
      team_id,
      crawlerOptions,
      pageOptions,
      extractorOptions,
      timeout,
      plan
    );
    const endTime = new Date().getTime();
    const timeTakenInSeconds = (endTime - startTime) / 1000;
    const numTokens = (result.data && result.data.markdown) ? numTokensFromString(result.data.markdown, "gpt-3.5-turbo") : 0;
    if (result.success) {
      let creditsToBeBilled = 1; // Assuming 1 credit per document
      const creditsPerLLMExtract = 50;
      if (extractorOptions.mode.includes("llm-extraction")) {
        // creditsToBeBilled = creditsToBeBilled + (creditsPerLLMExtract * filteredDocs.length);
        creditsToBeBilled += creditsPerLLMExtract;
      }
      let startTimeBilling = new Date().getTime();
      if (earlyReturn) {
        // Don't bill if we're early returning
        return;
      }
      const billingResult = await billTeam(
        team_id,
        creditsToBeBilled
      );
      if (!billingResult.success) {
        return res.status(402).json({
          success: false,
          error: "Failed to bill team. Insufficient credits or subscription not found.",
        });
      }
    }
    logJob({
      job_id: jobId,
      success: result.success,
      message: result.error,
      num_docs: 1,
      docs: [result.data],
      time_taken: timeTakenInSeconds,
      team_id: team_id,
      mode: "scrape",
      url: req.body.url,
      crawlerOptions: crawlerOptions,
      pageOptions: pageOptions,
      origin: origin, 
      extractor_options: extractorOptions,
      num_tokens: numTokens,
    });
    return res.status(result.returnCode).json(result);
  } catch (error) {
    Logger.error(error);
    return res.status(500).json({ error: error.message });
  }
 }
--- a/apps/api/src/controllers/status.ts
+++ b/apps/api/src/controllers/status.ts
@ -1,42 +0,0 @@
 import { Request, Response } from "express";
 import { getWebScraperQueue } from "../../src/services/queue-service";
 import { supabaseGetJobById } from "../../src/lib/supabase-jobs";
 import { Logger } from "../../src/lib/logger";
 export async function crawlJobStatusPreviewController(req: Request, res: Response) {
  try {
    const job = await getWebScraperQueue().getJob(req.params.jobId);
    if (!job) {
      return res.status(404).json({ error: "Job not found" });
    }
    const { current, current_url, total, current_step, partialDocs } = await job.progress();
    let data = job.returnvalue;
    if (process.env.USE_DB_AUTHENTICATION === "true") {
      const supabaseData = await supabaseGetJobById(req.params.jobId);
      if (supabaseData) {
        data = supabaseData.docs;
      }
    }
    let jobStatus = await job.getState();
    if (jobStatus === 'waiting' || jobStatus === 'stuck') {
      jobStatus = 'active';
    }
    res.json({
      status: jobStatus,
      // progress: job.progress(),
      current,
      current_url,
      current_step,
      total,
      data: data ? data : null,
      partial_data: jobStatus == 'completed' ? [] : partialDocs,
    });
  } catch (error) {
    Logger.error(error);
    return res.status(500).json({ error: error.message });
  }
 }
--- a/apps/api/src/controllers/v0/admin/queue.ts
+++ b/apps/api/src/controllers/v0/admin/queue.ts
@ -0,0 +1,199 @@
 import { Request, Response } from "express";
 import { Job } from "bullmq";
 import { Logger } from "../../../lib/logger";
 import { getScrapeQueue } from "../../../services/queue-service";
 import { checkAlerts } from "../../../services/alerts";
 import { sendSlackWebhook } from "../../../services/alerts/slack";
 export async function cleanBefore24hCompleteJobsController(
  req: Request,
  res: Response
 ) {
  Logger.info("🐂 Cleaning jobs older than 24h");
  try {
    const scrapeQueue = getScrapeQueue();
    const batchSize = 10;
    const numberOfBatches = 9; // Adjust based on your needs
    const completedJobsPromises: Promise<Job[]>[] = [];
    for (let i = 0; i < numberOfBatches; i++) {
      completedJobsPromises.push(
        scrapeQueue.getJobs(
          ["completed"],
          i * batchSize,
          i * batchSize + batchSize,
          true
        )
      );
    }
    const completedJobs: Job[] = (
      await Promise.all(completedJobsPromises)
    ).flat();
    const before24hJobs =
      completedJobs.filter(
        (job) => job.finishedOn < Date.now() - 24 * 60 * 60 * 1000
      ) || [];
    let count = 0;
    if (!before24hJobs) {
      return res.status(200).send(`No jobs to remove.`);
    }
    for (const job of before24hJobs) {
      try {
        await job.remove();
        count++;
      } catch (jobError) {
        Logger.error(`🐂 Failed to remove job with ID ${job.id}: ${jobError}`);
      }
    }
    return res.status(200).send(`Removed ${count} completed jobs.`);
  } catch (error) {
    Logger.error(`🐂 Failed to clean last 24h complete jobs: ${error}`);
    return res.status(500).send("Failed to clean jobs");
  }
 }
 export async function checkQueuesController(req: Request, res: Response) {
  try {
    await checkAlerts();
    return res.status(200).send("Alerts initialized");
  } catch (error) {
    Logger.debug(`Failed to initialize alerts: ${error}`);
    return res.status(500).send("Failed to initialize alerts");
  }
 }
 // Use this as a "health check" that way we dont destroy the server
 export async function queuesController(req: Request, res: Response) {
  try {
    const scrapeQueue = getScrapeQueue();
    const [webScraperActive] = await Promise.all([
      scrapeQueue.getActiveCount(),
    ]);
    const noActiveJobs = webScraperActive === 0;
    // 200 if no active jobs, 503 if there are active jobs
    return res.status(noActiveJobs ? 200 : 500).json({
      webScraperActive,
      noActiveJobs,
    });
  } catch (error) {
    Logger.error(error);
    return res.status(500).json({ error: error.message });
  }
 }
 export async function autoscalerController(req: Request, res: Response) {
  try {
    const maxNumberOfMachines = 80;
    const minNumberOfMachines = 20;
    const scrapeQueue = getScrapeQueue();
    const [webScraperActive, webScraperWaiting, webScraperPriority] =
      await Promise.all([
        scrapeQueue.getActiveCount(),
        scrapeQueue.getWaitingCount(),
        scrapeQueue.getPrioritizedCount(),
      ]);
    let waitingAndPriorityCount = webScraperWaiting + webScraperPriority;
    // get number of machines active
    const request = await fetch(
      "https://api.machines.dev/v1/apps/firecrawl-scraper-js/machines",
      {
        headers: {
          Authorization: `Bearer ${process.env.FLY_API_TOKEN}`,
        },
      }
    );
    const machines = await request.json();
    // Only worker machines
    const activeMachines = machines.filter(
      (machine) =>
        (machine.state === "started" ||
          machine.state === "starting" ||
          machine.state === "replacing") &&
        machine.config.env["FLY_PROCESS_GROUP"] === "worker"
    ).length;
    let targetMachineCount = activeMachines;
    const baseScaleUp = 10;
    // Slow scale down
    const baseScaleDown = 2;
    // Scale up logic
    if (webScraperActive > 9000 || waitingAndPriorityCount > 2000) {
      targetMachineCount = Math.min(
        maxNumberOfMachines,
        activeMachines + baseScaleUp * 3
      );
    } else if (webScraperActive > 5000 || waitingAndPriorityCount > 1000) {
      targetMachineCount = Math.min(
        maxNumberOfMachines,
        activeMachines + baseScaleUp * 2
      );
    } else if (webScraperActive > 1000 || waitingAndPriorityCount > 500) {
      targetMachineCount = Math.min(
        maxNumberOfMachines,
        activeMachines + baseScaleUp
      );
    }
    // Scale down logic
    if (webScraperActive < 100 && waitingAndPriorityCount < 50) {
      targetMachineCount = Math.max(
        minNumberOfMachines,
        activeMachines - baseScaleDown * 3
      );
    } else if (webScraperActive < 500 && waitingAndPriorityCount < 200) {
      targetMachineCount = Math.max(
        minNumberOfMachines,
        activeMachines - baseScaleDown * 2
      );
    } else if (webScraperActive < 1000 && waitingAndPriorityCount < 500) {
      targetMachineCount = Math.max(
        minNumberOfMachines,
        activeMachines - baseScaleDown
      );
    }
    if (targetMachineCount !== activeMachines) {
      Logger.info(
        `🐂 Scaling from ${activeMachines} to ${targetMachineCount} - ${webScraperActive} active, ${webScraperWaiting} waiting`
      );
      if (targetMachineCount > activeMachines) {
        sendSlackWebhook(
          `🐂 Scaling from ${activeMachines} to ${targetMachineCount} - ${webScraperActive} active, ${webScraperWaiting} waiting - Current DateTime: ${new Date().toISOString()}`,
          false,
          process.env.SLACK_AUTOSCALER ?? ""
        );
      } else {
        sendSlackWebhook(
          `🐂 Scaling from ${activeMachines} to ${targetMachineCount} - ${webScraperActive} active, ${webScraperWaiting} waiting - Current DateTime: ${new Date().toISOString()}`,
          false,
          process.env.SLACK_AUTOSCALER ?? ""
        );
      }
      return res.status(200).json({
        mode: "scale-descale",
        count: targetMachineCount,
      });
    }
    return res.status(200).json({
      mode: "normal",
      count: activeMachines,
    });
  } catch (error) {
    Logger.error(error);
    return res.status(500).send("Failed to initialize autoscaler");
  }
 }
--- a/apps/api/src/controllers/v0/admin/redis-health.ts
+++ b/apps/api/src/controllers/v0/admin/redis-health.ts
@ -1,7 +1,7 @@
 import { Request, Response } from "express";
 import Redis from "ioredis";
-import { Logger } from "../../lib/logger";
+import { Logger } from "../../../lib/logger";
-import { redisRateLimitClient } from "../../services/rate-limiter";
+import { redisRateLimitClient } from "../../../services/rate-limiter";
 export async function redisHealthController(req: Request, res: Response) {
  const retryOperation = async (operation, retries = 3) => {
--- a/apps/api/src/controllers/v0/crawl-cancel.ts
+++ b/apps/api/src/controllers/v0/crawl-cancel.ts
@ -1,11 +1,10 @@
 import { Request, Response } from "express";
-import { authenticateUser } from "./auth";
+import { authenticateUser } from "../auth";
-import { RateLimiterMode } from "../../src/types";
+import { RateLimiterMode } from "../../../src/types";
-import { addWebScraperJob } from "../../src/services/queue-jobs";
+import { supabase_service } from "../../../src/services/supabase";
-import { getWebScraperQueue } from "../../src/services/queue-service";
+import { Logger } from "../../../src/lib/logger";
-import { supabase_service } from "../../src/services/supabase";
+import { getCrawl, saveCrawl } from "../../../src/lib/crawl-redis";
-import { billTeam } from "../../src/services/billing/credit_billing";
+import * as Sentry from "@sentry/node";
 import { Logger } from "../../src/lib/logger";
 export async function crawlCancelController(req: Request, res: Response) {
  try {
@ -19,8 +18,9 @@ export async function crawlCancelController(req: Request, res: Response) {
    if (!success) {
      return res.status(status).json({ error });
    }
-    const job = await getWebScraperQueue().getJob(req.params.jobId);
+
-    if (!job) {
+    const sc = await getCrawl(req.params.jobId);
    if (!sc) {
      return res.status(404).json({ error: "Job not found" });
    }
@ -40,31 +40,18 @@ export async function crawlCancelController(req: Request, res: Response) {
      }
    }
    const jobState = await job.getState();
    const { partialDocs } = await job.progress();
    if (partialDocs && partialDocs.length > 0 && jobState === "active") {
      Logger.info("Billing team for partial docs...");
      // Note: the credits that we will bill them here might be lower than the actual
      // due to promises that are not yet resolved
      await billTeam(team_id, partialDocs.length);
    }
    try {
-      await getWebScraperQueue().client.del(job.lockKey());
+      sc.cancelled = true;
-      await job.takeLock();
+      await saveCrawl(req.params.jobId, sc);
      await job.discard();
      await job.moveToFailed(Error("Job cancelled by user"), true);
    } catch (error) {
      Logger.error(error);
    }
    const newJobState = await job.getState();
    res.json({
      status: "cancelled"
    });
  } catch (error) {
    Sentry.captureException(error);
    Logger.error(error);
    return res.status(500).json({ error: error.message });
  }
--- a/apps/api/src/controllers/v0/crawl-status.ts
+++ b/apps/api/src/controllers/v0/crawl-status.ts
@ -0,0 +1,71 @@
 import { Request, Response } from "express";
 import { authenticateUser } from "../auth";
 import { RateLimiterMode } from "../../../src/types";
 import { getScrapeQueue } from "../../../src/services/queue-service";
 import { Logger } from "../../../src/lib/logger";
 import { getCrawl, getCrawlJobs } from "../../../src/lib/crawl-redis";
 import { supabaseGetJobsById } from "../../../src/lib/supabase-jobs";
 import * as Sentry from "@sentry/node";
 export async function getJobs(ids: string[]) {
  const jobs = (await Promise.all(ids.map(x => getScrapeQueue().getJob(x)))).filter(x => x);
  if (process.env.USE_DB_AUTHENTICATION === "true") {
    const supabaseData = await supabaseGetJobsById(ids);
    supabaseData.forEach(x => {
      const job = jobs.find(y => y.id === x.job_id);
      if (job) {
        job.returnvalue = x.docs;
      }
    })
  }
  jobs.forEach(job => {
    job.returnvalue = Array.isArray(job.returnvalue) ? job.returnvalue[0] : job.returnvalue;
  });
  return jobs;
 }
 export async function crawlStatusController(req: Request, res: Response) {
  try {
    const { success, team_id, error, status } = await authenticateUser(
      req,
      res,
      RateLimiterMode.CrawlStatus
    );
    if (!success) {
      return res.status(status).json({ error });
    }
    const sc = await getCrawl(req.params.jobId);
    if (!sc) {
      return res.status(404).json({ error: "Job not found" });
    }
    if (sc.team_id !== team_id) {
      return res.status(403).json({ error: "Forbidden" });
    }
    const jobIDs = await getCrawlJobs(req.params.jobId);
    const jobs = (await getJobs(jobIDs)).sort((a, b) => a.timestamp - b.timestamp);
    const jobStatuses = await Promise.all(jobs.map(x => x.getState()));
    const jobStatus = sc.cancelled ? "failed" : jobStatuses.every(x => x === "completed") ? "completed" : jobStatuses.some(x => x === "failed") ? "failed" : "active";
    const data = jobs.map(x => Array.isArray(x.returnvalue) ? x.returnvalue[0] : x.returnvalue);
    res.json({
      status: jobStatus,
      current: jobStatuses.filter(x => x === "completed" || x === "failed").length,
      total: jobs.length,
      data: jobStatus === "completed" ? data : null,
      partial_data: jobStatus === "completed" ? [] : data.filter(x => x !== null),
    });
  } catch (error) {
    Sentry.captureException(error);
    Logger.error(error);
    return res.status(500).json({ error: error.message });
  }
 }
--- a/apps/api/src/controllers/v0/crawl.ts
+++ b/apps/api/src/controllers/v0/crawl.ts
@ -0,0 +1,232 @@
 import { Request, Response } from "express";
 import { checkTeamCredits } from "../../../src/services/billing/credit_billing";
 import { authenticateUser } from "../auth";
 import { RateLimiterMode } from "../../../src/types";
 import { addScrapeJob } from "../../../src/services/queue-jobs";
 import { isUrlBlocked } from "../../../src/scraper/WebScraper/utils/blocklist";
 import { logCrawl } from "../../../src/services/logging/crawl_log";
 import { validateIdempotencyKey } from "../../../src/services/idempotency/validate";
 import { createIdempotencyKey } from "../../../src/services/idempotency/create";
 import { defaultCrawlPageOptions, defaultCrawlerOptions, defaultOrigin } from "../../../src/lib/default-values";
 import { v4 as uuidv4 } from "uuid";
 import { Logger } from "../../../src/lib/logger";
 import { addCrawlJob, addCrawlJobs, crawlToCrawler, lockURL, lockURLs, saveCrawl, StoredCrawl } from "../../../src/lib/crawl-redis";
 import { getScrapeQueue } from "../../../src/services/queue-service";
 import { checkAndUpdateURL } from "../../../src/lib/validateUrl";
 import * as Sentry from "@sentry/node";
 import { getJobPriority } from "../../lib/job-priority";
 export async function crawlController(req: Request, res: Response) {
  try {
    const { success, team_id, error, status, plan } = await authenticateUser(
      req,
      res,
      RateLimiterMode.Crawl
    );
    if (!success) {
      return res.status(status).json({ error });
    }
    if (req.headers["x-idempotency-key"]) {
      const isIdempotencyValid = await validateIdempotencyKey(req);
      if (!isIdempotencyValid) {
        return res.status(409).json({ error: "Idempotency key already used" });
      }
      try {
        createIdempotencyKey(req);
      } catch (error) {
        Logger.error(error);
        return res.status(500).json({ error: error.message });
      }
    }
    const crawlerOptions = {
      ...defaultCrawlerOptions,
      ...req.body.crawlerOptions,
    };
    const pageOptions = { ...defaultCrawlPageOptions, ...req.body.pageOptions };
    if (Array.isArray(crawlerOptions.includes)) {
      for (const x of crawlerOptions.includes) {
        try {
          new RegExp(x);
        } catch (e) {
          return res.status(400).json({ error: e.message });
        }
      }
    }
    if (Array.isArray(crawlerOptions.excludes)) {
      for (const x of crawlerOptions.excludes) {
        try {
          new RegExp(x);
        } catch (e) {
          return res.status(400).json({ error: e.message });
        }
      }
    }
    const limitCheck = req.body?.crawlerOptions?.limit ?? 1;
    const { success: creditsCheckSuccess, message: creditsCheckMessage, remainingCredits } =
      await checkTeamCredits(team_id, limitCheck);
    if (!creditsCheckSuccess) {
      return res.status(402).json({ error: "Insufficient credits. You may be requesting with a higher limit than the amount of credits you have left. If not, upgrade your plan at https://firecrawl.dev/pricing or contact us at hello@firecrawl.com" });
    }
    // TODO: need to do this to v1
    crawlerOptions.limit = Math.min(remainingCredits, crawlerOptions.limit);
    let url = req.body.url;
    if (!url) {
      return res.status(400).json({ error: "Url is required" });
    }
    if (typeof url !== "string") {
      return res.status(400).json({ error: "URL must be a string" });
    }
    try {
      url = checkAndUpdateURL(url).url;
    } catch (e) {
      return res
        .status(e instanceof Error && e.message === "Invalid URL" ? 400 : 500)
        .json({ error: e.message ?? e });
    }
    if (isUrlBlocked(url)) {
      return res.status(403).json({
        error:
          "Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it.",
      });
    }
    // if (mode === "single_urls" && !url.includes(",")) { // NOTE: do we need this?
    //   try {
    //     const a = new WebScraperDataProvider();
    //     await a.setOptions({
    //       jobId: uuidv4(),
    //       mode: "single_urls",
    //       urls: [url],
    //       crawlerOptions: { ...crawlerOptions, returnOnlyUrls: true },
    //       pageOptions: pageOptions,
    //     });
    //     const docs = await a.getDocuments(false, (progress) => {
    //       job.updateProgress({
    //         current: progress.current,
    //         total: progress.total,
    //         current_step: "SCRAPING",
    //         current_url: progress.currentDocumentUrl,
    //       });
    //     });
    //     return res.json({
    //       success: true,
    //       documents: docs,
    //     });
    //   } catch (error) {
    //     Logger.error(error);
    //     return res.status(500).json({ error: error.message });
    //   }
    // }
    const id = uuidv4();
    await logCrawl(id, team_id);
    const sc: StoredCrawl = {
      originUrl: url,
      crawlerOptions,
      pageOptions,
      team_id,
      plan,
      createdAt: Date.now(),
    };
    const crawler = crawlToCrawler(id, sc);
    try {
      sc.robots = await crawler.getRobotsTxt();
    } catch (_) {}
    await saveCrawl(id, sc);
    const sitemap = sc.crawlerOptions?.ignoreSitemap
      ? null
      : await crawler.tryGetSitemap();
    if (sitemap !== null && sitemap.length > 0) {
      let jobPriority = 20;
      // If it is over 1000, we need to get the job priority,
      // otherwise we can use the default priority of 20
      if(sitemap.length > 1000){
        // set base to 21
        jobPriority = await getJobPriority({plan, team_id, basePriority: 21})
      }
      const jobs = sitemap.map((x) => {
        const url = x.url;
        const uuid = uuidv4();
        return {
          name: uuid,
          data: {
            url,
            mode: "single_urls",
            crawlerOptions: crawlerOptions,
            team_id: team_id,
            pageOptions: pageOptions,
            origin: req.body.origin ?? defaultOrigin,
            crawl_id: id,
            sitemapped: true,
          },
          opts: {
            jobId: uuid,
            priority: jobPriority,
          },
        };
      });
      await lockURLs(
        id,
        jobs.map((x) => x.data.url)
      );
      await addCrawlJobs(
        id,
        jobs.map((x) => x.opts.jobId)
      );
      if (Sentry.isInitialized()) {
        for (const job of jobs) {
          // add with sentry instrumentation
          await addScrapeJob(job.data as any, {}, job.opts.jobId);
        }
      } else {
        await getScrapeQueue().addBulk(jobs);
      }
    } else {
      await lockURL(id, sc, url);
      // Not needed, first one should be 15.
      // const jobPriority = await getJobPriority({plan, team_id, basePriority: 10})
      const job = await addScrapeJob(
        {
          url,
          mode: "single_urls",
          crawlerOptions: crawlerOptions,
          team_id: team_id,
          pageOptions: pageOptions,
          origin: req.body.origin ?? defaultOrigin,
          crawl_id: id,
        },
        {
          priority: 15, // prioritize request 0 of crawl jobs same as scrape jobs
        }
      );
      await addCrawlJob(id, job.id);
    }
    res.json({ jobId: id });
  } catch (error) {
    Sentry.captureException(error);
    Logger.error(error);
    return res.status(500).json({ error: error.message });
  }
 }
--- a/apps/api/src/controllers/v0/crawlPreview.ts
+++ b/apps/api/src/controllers/v0/crawlPreview.ts
@ -0,0 +1,138 @@
 import { Request, Response } from "express";
 import { authenticateUser } from "../auth";
 import { RateLimiterMode } from "../../../src/types";
 import { isUrlBlocked } from "../../../src/scraper/WebScraper/utils/blocklist";
 import { v4 as uuidv4 } from "uuid";
 import { Logger } from "../../../src/lib/logger";
 import { addCrawlJob, crawlToCrawler, lockURL, saveCrawl, StoredCrawl } from "../../../src/lib/crawl-redis";
 import { addScrapeJob } from "../../../src/services/queue-jobs";
 import { checkAndUpdateURL } from "../../../src/lib/validateUrl";
 import * as Sentry from "@sentry/node";
 export async function crawlPreviewController(req: Request, res: Response) {
  try {
    const { success, error, status, team_id:a, plan } = await authenticateUser(
      req,
      res,
      RateLimiterMode.Preview
    );
    const team_id = "preview";
    if (!success) {
      return res.status(status).json({ error });
    }
    let url = req.body.url;
    if (!url) {
      return res.status(400).json({ error: "Url is required" });
    }
    try {
      url = checkAndUpdateURL(url).url;
    } catch (e) {
      return res
        .status(e instanceof Error && e.message === "Invalid URL" ? 400 : 500)
        .json({ error: e.message ?? e });
    }
    if (isUrlBlocked(url)) {
      return res
        .status(403)
        .json({
          error:
            "Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it.",
        });
    }
    const crawlerOptions = req.body.crawlerOptions ?? {};
    const pageOptions = req.body.pageOptions ?? { onlyMainContent: false, includeHtml: false, removeTags: [] };
    // if (mode === "single_urls" && !url.includes(",")) { // NOTE: do we need this?
    //   try {
    //     const a = new WebScraperDataProvider();
    //     await a.setOptions({
    //       jobId: uuidv4(),
    //       mode: "single_urls",
    //       urls: [url],
    //       crawlerOptions: { ...crawlerOptions, returnOnlyUrls: true },
    //       pageOptions: pageOptions,
    //     });
    //     const docs = await a.getDocuments(false, (progress) => {
    //       job.updateProgress({
    //         current: progress.current,
    //         total: progress.total,
    //         current_step: "SCRAPING",
    //         current_url: progress.currentDocumentUrl,
    //       });
    //     });
    //     return res.json({
    //       success: true,
    //       documents: docs,
    //     });
    //   } catch (error) {
    //     Logger.error(error);
    //     return res.status(500).json({ error: error.message });
    //   }
    // }
    const id = uuidv4();
    let robots;
    try {
      robots = await this.getRobotsTxt();
    } catch (_) {}
    const sc: StoredCrawl = {
      originUrl: url,
      crawlerOptions,
      pageOptions,
      team_id,
      plan,
      robots,
      createdAt: Date.now(),
    };
    await saveCrawl(id, sc);
    const crawler = crawlToCrawler(id, sc);
    const sitemap = sc.crawlerOptions?.ignoreSitemap ? null : await crawler.tryGetSitemap();
    if (sitemap !== null) {
      for (const url of sitemap.map(x => x.url)) {
        await lockURL(id, sc, url);
        const job = await addScrapeJob({
          url,
          mode: "single_urls",
          crawlerOptions: crawlerOptions,
          team_id: team_id,
          pageOptions: pageOptions,
          origin: "website-preview",
          crawl_id: id,
          sitemapped: true,
        });
        await addCrawlJob(id, job.id);
      }
    } else {
      await lockURL(id, sc, url);
      const job = await addScrapeJob({
        url,
        mode: "single_urls",
        crawlerOptions: crawlerOptions,
        team_id: team_id,
        pageOptions: pageOptions,
        origin: "website-preview",
        crawl_id: id,
      });
      await addCrawlJob(id, job.id);
    }
    res.json({ jobId: id });
  } catch (error) {
    Sentry.captureException(error);
    Logger.error(error);
    return res.status(500).json({ error: error.message });
  }
 }
--- a/apps/api/src/controllers/v0/keyAuth.ts
+++ b/apps/api/src/controllers/v0/keyAuth.ts
@ -1,8 +1,8 @@
-import { AuthResponse, RateLimiterMode } from "../types";
+import { AuthResponse, RateLimiterMode } from "../../types";
 import { Request, Response } from "express";
-import { authenticateUser } from "./auth";
+import { authenticateUser } from "../auth";
 export const keyAuthController = async (req: Request, res: Response) => {
--- a/apps/api/src/controllers/v0/liveness.ts
+++ b/apps/api/src/controllers/v0/liveness.ts
--- a/apps/api/src/controllers/v0/readiness.ts
+++ b/apps/api/src/controllers/v0/readiness.ts
--- a/apps/api/src/controllers/v0/scrape.ts
+++ b/apps/api/src/controllers/v0/scrape.ts
@ -0,0 +1,286 @@
 import { ExtractorOptions, PageOptions } from "./../../lib/entities";
 import { Request, Response } from "express";
 import {
  billTeam,
  checkTeamCredits,
 } from "../../services/billing/credit_billing";
 import { authenticateUser } from "../auth";
 import { PlanType, RateLimiterMode } from "../../types";
 import { logJob } from "../../services/logging/log_job";
 import { Document } from "../../lib/entities";
 import { isUrlBlocked } from "../../scraper/WebScraper/utils/blocklist"; // Import the isUrlBlocked function
 import { numTokensFromString } from "../../lib/LLM-extraction/helpers";
 import {
  defaultPageOptions,
  defaultExtractorOptions,
  defaultTimeout,
  defaultOrigin,
 } from "../../lib/default-values";
 import { addScrapeJob, waitForJob } from "../../services/queue-jobs";
 import { getScrapeQueue } from "../../services/queue-service";
 import { v4 as uuidv4 } from "uuid";
 import { Logger } from "../../lib/logger";
 import * as Sentry from "@sentry/node";
 import { getJobPriority } from "../../lib/job-priority";
 export async function scrapeHelper(
  jobId: string,
  req: Request,
  team_id: string,
  crawlerOptions: any,
  pageOptions: PageOptions,
  extractorOptions: ExtractorOptions,
  timeout: number,
  plan?: PlanType
 ): Promise<{
  success: boolean;
  error?: string;
  data?: Document;
  returnCode: number;
 }> {
  const url = req.body.url;
  if (!url) {
    return { success: false, error: "Url is required", returnCode: 400 };
  }
  if (isUrlBlocked(url)) {
    return {
      success: false,
      error:
        "Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it.",
      returnCode: 403,
    };
  }
  const jobPriority = await getJobPriority({ plan, team_id, basePriority: 10 });
  const job = await addScrapeJob(
    {
      url,
      mode: "single_urls",
      crawlerOptions,
      team_id,
      pageOptions,
      extractorOptions,
      origin: req.body.origin ?? defaultOrigin,
      is_scrape: true,
    },
    {},
    jobId,
    jobPriority
  );
  let doc;
  const err = await Sentry.startSpan(
    {
      name: "Wait for job to finish",
      op: "bullmq.wait",
      attributes: { job: jobId },
    },
    async (span) => {
      try {
        doc = (await waitForJob(job.id, timeout))[0];
      } catch (e) {
        if (e instanceof Error && e.message.startsWith("Job wait")) {
          span.setAttribute("timedOut", true);
          return {
            success: false,
            error: "Request timed out",
            returnCode: 408,
          };
        } else if (
          typeof e === "string" &&
          (e.includes("Error generating completions: ") ||
            e.includes("Invalid schema for function") ||
            e.includes(
              "LLM extraction did not match the extraction schema you provided."
            ))
        ) {
          return {
            success: false,
            error: e,
            returnCode: 500,
          };
        } else {
          throw e;
        }
      }
      span.setAttribute("result", JSON.stringify(doc));
      return null;
    }
  );
  if (err !== null) {
    return err;
  }
  await job.remove();
  if (!doc) {
    console.error("!!! PANIC DOC IS", doc, job);
    return {
      success: true,
      error: "No page found",
      returnCode: 200,
      data: doc,
    };
  }
  delete doc.index;
  delete doc.provider;
  // Remove rawHtml if pageOptions.rawHtml is false and extractorOptions.mode is llm-extraction-from-raw-html
  if (
    !pageOptions.includeRawHtml &&
    extractorOptions.mode == "llm-extraction-from-raw-html"
  ) {
    if (doc.rawHtml) {
      delete doc.rawHtml;
    }
  }
  if (!pageOptions.includeHtml) {
    if (doc.html) {
      delete doc.html;
    }
  }
  return {
    success: true,
    data: doc,
    returnCode: 200,
  };
 }
 export async function scrapeController(req: Request, res: Response) {
  try {
    let earlyReturn = false;
    // make sure to authenticate user first, Bearer <token>
    const { success, team_id, error, status, plan } = await authenticateUser(
      req,
      res,
      RateLimiterMode.Scrape
    );
    if (!success) {
      return res.status(status).json({ error });
    }
    const crawlerOptions = req.body.crawlerOptions ?? {};
    const pageOptions = { ...defaultPageOptions, ...req.body.pageOptions };
    const extractorOptions = {
      ...defaultExtractorOptions,
      ...req.body.extractorOptions,
    };
    const origin = req.body.origin ?? defaultOrigin;
    let timeout = req.body.timeout ?? defaultTimeout;
    if (extractorOptions.mode.includes("llm-extraction")) {
      if (
        typeof extractorOptions.extractionSchema !== "object" ||
        extractorOptions.extractionSchema === null
      ) {
        return res.status(400).json({
          error:
            "extractorOptions.extractionSchema must be an object if llm-extraction mode is specified",
        });
      }
      pageOptions.onlyMainContent = true;
      timeout = req.body.timeout ?? 90000;
    }
    // checkCredits
    try {
      const { success: creditsCheckSuccess, message: creditsCheckMessage } =
        await checkTeamCredits(team_id, 1);
      if (!creditsCheckSuccess) {
        earlyReturn = true;
        return res.status(402).json({ error: "Insufficient credits" });
      }
    } catch (error) {
      Logger.error(error);
      earlyReturn = true;
      return res.status(500).json({
        error:
          "Error checking team credits. Please contact hello@firecrawl.com for help.",
      });
    }
    const jobId = uuidv4();
    const startTime = new Date().getTime();
    const result = await scrapeHelper(
      jobId,
      req,
      team_id,
      crawlerOptions,
      pageOptions,
      extractorOptions,
      timeout,
      plan
    );
    const endTime = new Date().getTime();
    const timeTakenInSeconds = (endTime - startTime) / 1000;
    const numTokens =
      result.data && result.data.markdown
        ? numTokensFromString(result.data.markdown, "gpt-3.5-turbo")
        : 0;
    if (result.success) {
      let creditsToBeBilled = 1;
      const creditsPerLLMExtract = 49;
      if (extractorOptions.mode.includes("llm-extraction")) {
        // creditsToBeBilled = creditsToBeBilled + (creditsPerLLMExtract * filteredDocs.length);
        creditsToBeBilled += creditsPerLLMExtract;
      }
      let startTimeBilling = new Date().getTime();
      if (earlyReturn) {
        // Don't bill if we're early returning
        return;
      }
      if (creditsToBeBilled > 0) {
        // billing for doc done on queue end, bill only for llm extraction
        const billingResult = await billTeam(team_id, creditsToBeBilled);
        if (!billingResult.success) {
          return res.status(402).json({
            success: false,
            error:
              "Failed to bill team. Insufficient credits or subscription not found.",
          });
        }
      }
    }
    logJob({
      job_id: jobId,
      success: result.success,
      message: result.error,
      num_docs: 1,
      docs: [result.data],
      time_taken: timeTakenInSeconds,
      team_id: team_id,
      mode: "scrape",
      url: req.body.url,
      crawlerOptions: crawlerOptions,
      pageOptions: pageOptions,
      origin: origin,
      extractor_options: extractorOptions,
      num_tokens: numTokens,
    });
    return res.status(result.returnCode).json(result);
  } catch (error) {
    Sentry.captureException(error);
    Logger.error(error);
    return res.status(500).json({
      error:
        typeof error === "string"
          ? error
          : error?.message ?? "Internal Server Error",
    });
  }
 }
--- a/apps/api/src/controllers/v0/search.ts
+++ b/apps/api/src/controllers/v0/search.ts
@ -1,14 +1,18 @@
 import { Request, Response } from "express";
-import { WebScraperDataProvider } from "../scraper/WebScraper";
+import { WebScraperDataProvider } from "../../scraper/WebScraper";
-import { billTeam, checkTeamCredits } from "../services/billing/credit_billing";
+import { billTeam, checkTeamCredits } from "../../services/billing/credit_billing";
-import { authenticateUser } from "./auth";
+import { authenticateUser } from "../auth";
-import { RateLimiterMode } from "../types";
+import { PlanType, RateLimiterMode } from "../../types";
-import { logJob } from "../services/logging/log_job";
+import { logJob } from "../../services/logging/log_job";
-import { PageOptions, SearchOptions } from "../lib/entities";
+import { PageOptions, SearchOptions } from "../../lib/entities";
-import { search } from "../search";
+import { search } from "../../search";
-import { isUrlBlocked } from "../scraper/WebScraper/utils/blocklist";
+import { isUrlBlocked } from "../../scraper/WebScraper/utils/blocklist";
 import { v4 as uuidv4 } from "uuid";
-import { Logger } from "../lib/logger";
+import { Logger } from "../../lib/logger";
 import { getScrapeQueue } from "../../services/queue-service";
 import { addScrapeJob, waitForJob } from "../../services/queue-jobs";
 import * as Sentry from "@sentry/node";
 import { getJobPriority } from "../../lib/job-priority";
 export async function searchHelper(
  jobId: string,
@ -17,6 +21,7 @@ export async function searchHelper(
  crawlerOptions: any,
  pageOptions: PageOptions,
  searchOptions: SearchOptions,
  plan: PlanType
 ): Promise<{
  success: boolean;
  error?: string;
@ -73,55 +78,57 @@ export async function searchHelper(
    return { success: true, error: "No search results found", returnCode: 200 };
  }
  const jobPriority = await getJobPriority({plan, team_id, basePriority: 20});
  // filter out social media links
  const jobDatas = res.map(x => {
    const url = x.url;
    const uuid = uuidv4();
    return {
      name: uuid,
      data: {
        url,
        mode: "single_urls",
        crawlerOptions: crawlerOptions,
        team_id: team_id,
        pageOptions: pageOptions,
      },
      opts: {
        jobId: uuid,
        priority: jobPriority,
      }
    };
  })
-  const a = new WebScraperDataProvider();
+  let jobs = [];
-  await a.setOptions({
+  if (Sentry.isInitialized()) {
-    jobId,
+    for (const job of jobDatas) {
-    mode: "single_urls",
+      // add with sentry instrumentation
-    urls: res.map((r) => r.url).slice(0, Math.min(searchOptions.limit ?? 5, 5)),
+      jobs.push(await addScrapeJob(job.data as any, {}, job.opts.jobId));
-    crawlerOptions: {
+    }
-      ...crawlerOptions,
+  } else {
-    },
+    jobs = await getScrapeQueue().addBulk(jobDatas);
-    pageOptions: {
+    await getScrapeQueue().addBulk(jobs);
-      ...pageOptions,
+  }
      onlyMainContent: pageOptions?.onlyMainContent ?? true,
      fetchPageContent: pageOptions?.fetchPageContent ?? true,
      includeHtml: pageOptions?.includeHtml ?? false,
      removeTags: pageOptions?.removeTags ?? [],
      fallback: false,
    },
  });
-  const docs = await a.getDocuments(false);
+  const docs = (await Promise.all(jobs.map(x => waitForJob(x.id, 60000)))).map(x => x[0]);
  if (docs.length === 0) {
    return { success: true, error: "No search results found", returnCode: 200 };
  }
  await Promise.all(jobs.map(x => x.remove()));
  // make sure doc.content is not empty
  const filteredDocs = docs.filter(
-    (doc: { content?: string }) => doc.content && doc.content.trim().length > 0
+    (doc: { content?: string }) => doc && doc.content && doc.content.trim().length > 0
  );
  if (filteredDocs.length === 0) {
    return { success: true, error: "No page found", returnCode: 200, data: docs };
  }
  const billingResult = await billTeam(
    team_id,
    filteredDocs.length
  );
  if (!billingResult.success) {
    return {
      success: false,
      error:
        "Failed to bill team. Insufficient credits or subscription not found.",
      returnCode: 402,
    };
  }
  return {
    success: true,
    data: filteredDocs,
@ -132,7 +139,7 @@ export async function searchHelper(
 export async function searchController(req: Request, res: Response) {
  try {
    // make sure to authenticate user first, Bearer <token>
-    const { success, team_id, error, status } = await authenticateUser(
+    const { success, team_id, error, status, plan } = await authenticateUser(
      req,
      res,
      RateLimiterMode.Search
@ -142,17 +149,16 @@ export async function searchController(req: Request, res: Response) {
    }
    const crawlerOptions = req.body.crawlerOptions ?? {};
    const pageOptions = req.body.pageOptions ?? {
-      includeHtml: false,
+      includeHtml: req.body.pageOptions?.includeHtml ?? false,
-      onlyMainContent: true,
+      onlyMainContent: req.body.pageOptions?.onlyMainContent ?? false,
-      fetchPageContent: true,
+      fetchPageContent: req.body.pageOptions?.fetchPageContent ?? true,
-      removeTags: [],
+      removeTags: req.body.pageOptions?.removeTags ?? [],
-      fallback: false,
+      fallback: req.body.pageOptions?.fallback ?? false,
    };
    const origin = req.body.origin ?? "api";
    const searchOptions = req.body.searchOptions ?? { limit: 5 };
    const jobId = uuidv4();
    try {
@ -162,6 +168,7 @@ export async function searchController(req: Request, res: Response) {
        return res.status(402).json({ error: "Insufficient credits" });
      }
    } catch (error) {
      Sentry.captureException(error);
      Logger.error(error);
      return res.status(500).json({ error: "Internal server error" });
    }
@ -173,6 +180,7 @@ export async function searchController(req: Request, res: Response) {
      crawlerOptions,
      pageOptions,
      searchOptions,
      plan
    );
    const endTime = new Date().getTime();
    const timeTakenInSeconds = (endTime - startTime) / 1000;
@ -192,6 +200,11 @@ export async function searchController(req: Request, res: Response) {
    });
    return res.status(result.returnCode).json(result);
  } catch (error) {
    if (error instanceof Error && error.message.startsWith("Job wait")) {
      return res.status(408).json({ error: "Request timed out" });
    }
    Sentry.captureException(error);
    Logger.error(error);
    return res.status(500).json({ error: error.message });
  }
--- a/apps/api/src/controllers/v0/status.ts
+++ b/apps/api/src/controllers/v0/status.ts
@ -0,0 +1,43 @@
 import { Request, Response } from "express";
 import { Logger } from "../../../src/lib/logger";
 import { getCrawl, getCrawlJobs } from "../../../src/lib/crawl-redis";
 import { getJobs } from "./crawl-status";
 import * as Sentry from "@sentry/node";
 export async function crawlJobStatusPreviewController(req: Request, res: Response) {
  try {
    const sc = await getCrawl(req.params.jobId);
    if (!sc) {
      return res.status(404).json({ error: "Job not found" });
    }
    const jobIDs = await getCrawlJobs(req.params.jobId);
    // let data = job.returnvalue;
    // if (process.env.USE_DB_AUTHENTICATION === "true") {
    //   const supabaseData = await supabaseGetJobById(req.params.jobId);
    //   if (supabaseData) {
    //     data = supabaseData.docs;
    //   }
    // }
    const jobs = (await getJobs(jobIDs)).sort((a, b) => a.timestamp - b.timestamp);
    const jobStatuses = await Promise.all(jobs.map(x => x.getState()));
    const jobStatus = sc.cancelled ? "failed" : jobStatuses.every(x => x === "completed") ? "completed" : jobStatuses.some(x => x === "failed") ? "failed" : "active";
    const data = jobs.map(x => Array.isArray(x.returnvalue) ? x.returnvalue[0] : x.returnvalue);
    res.json({
      status: jobStatus,
      current: jobStatuses.filter(x => x === "completed" || x === "failed").length,
      total: jobs.length,
      data: jobStatus === "completed" ? data : null,
      partial_data: jobStatus === "completed" ? [] : data.filter(x => x !== null),
    });
  } catch (error) {
    Sentry.captureException(error);
    Logger.error(error);
    return res.status(500).json({ error: error.message });
  }
 }
--- a/apps/api/src/controllers/v1/tests/crawl.test.ts.WIP
+++ b/apps/api/src/controllers/v1/tests/crawl.test.ts.WIP
@ -0,0 +1,47 @@
 import { crawlController } from '../crawl'
 import { Request, Response } from 'express';
 import { authenticateUser } from '../auth'; // Ensure this import is correct
 import { createIdempotencyKey } from '../../services/idempotency/create';
 import { validateIdempotencyKey } from '../../services/idempotency/validate';
 import { v4 as uuidv4 } from 'uuid';
 jest.mock('../auth', () => ({
  authenticateUser: jest.fn().mockResolvedValue({
    success: true,
    team_id: 'team123',
    error: null,
    status: 200
  }),
  reduce: jest.fn()
 }));
 jest.mock('../../services/idempotency/validate');
 describe('crawlController', () => {
  it('should prevent duplicate requests using the same idempotency key', async () => {
    const req = {
      headers: {
        'x-idempotency-key': await uuidv4(),
        'Authorization': `Bearer ${process.env.TEST_API_KEY}`
      },
      body: {
        url: 'https://mendable.ai'
      }
    } as unknown as Request;
    const res = {
      status: jest.fn().mockReturnThis(),
      json: jest.fn()
    } as unknown as Response;
    // Mock the idempotency key validation to return false for the second call
    (validateIdempotencyKey as jest.Mock).mockResolvedValueOnce(true).mockResolvedValueOnce(false);
    // First request should succeed
    await crawlController(req, res);
    expect(res.status).not.toHaveBeenCalledWith(409);
    // Second request with the same key should fail
    await crawlController(req, res);
    expect(res.status).toHaveBeenCalledWith(409);
    expect(res.json).toHaveBeenCalledWith({ error: 'Idempotency key already used' });
  });
 });
--- a/apps/api/src/controllers/v1/tests/urlValidation.test.ts
+++ b/apps/api/src/controllers/v1/tests/urlValidation.test.ts
@ -0,0 +1,64 @@
 import { url } from "../types";
 describe("URL Schema Validation", () => {
  beforeEach(() => {
    jest.resetAllMocks();
  });
  it("should prepend http:// to URLs without a protocol", () => {
    const result = url.parse("example.com");
    expect(result).toBe("http://example.com");
  });
  it("should allow valid URLs with http or https", () => {
    expect(() => url.parse("http://example.com")).not.toThrow();
    expect(() => url.parse("https://example.com")).not.toThrow();
  });
  it("should allow valid URLs with http or https", () => {
    expect(() => url.parse("example.com")).not.toThrow();
  });
  it("should reject URLs with unsupported protocols", () => {
    expect(() => url.parse("ftp://example.com")).toThrow("Invalid URL");
  });
  it("should reject URLs without a valid top-level domain", () => {
    expect(() => url.parse("http://example")).toThrow("URL must have a valid top-level domain or be a valid path");
  });
  it("should reject blocked URLs", () => {
    expect(() => url.parse("https://facebook.com")).toThrow("Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it.");
  });
  it("should handle URLs with subdomains correctly", () => {
    expect(() => url.parse("http://sub.example.com")).not.toThrow();
    expect(() => url.parse("https://blog.example.com")).not.toThrow();
  });
  it("should handle URLs with paths correctly", () => {
    expect(() => url.parse("http://example.com/path")).not.toThrow();
    expect(() => url.parse("https://example.com/another/path")).not.toThrow();
  });
  it("should handle URLs with subdomains that are blocked", () => {
    expect(() => url.parse("https://sub.facebook.com")).toThrow("Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it.");
  });
  it("should handle URLs with paths that are blocked", () => {
    expect(() => url.parse("http://facebook.com/path")).toThrow("Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it.");
    expect(() => url.parse("https://facebook.com/another/path")).toThrow("Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it.");
  });
  it("should reject malformed URLs starting with 'http://http'", () => {
    expect(() => url.parse("http://http://example.com")).toThrow("Invalid URL. Invalid protocol.");
  });
  it("should reject malformed URLs containing multiple 'http://'", () => {
    expect(() => url.parse("http://example.com/http://example.com")).not.toThrow();
  });
  it("should reject malformed URLs containing multiple 'http://'", () => {
    expect(() => url.parse("http://ex ample.com/")).toThrow("Invalid URL");
  });
 })
--- a/apps/api/src/controllers/v1/crawl-cancel.ts
+++ b/apps/api/src/controllers/v1/crawl-cancel.ts
@ -0,0 +1,58 @@
 import { Request, Response } from "express";
 import { authenticateUser } from "../auth";
 import { RateLimiterMode } from "../../types";
 import { supabase_service } from "../../services/supabase";
 import { Logger } from "../../lib/logger";
 import { getCrawl, saveCrawl } from "../../lib/crawl-redis";
 import * as Sentry from "@sentry/node";
 export async function crawlCancelController(req: Request, res: Response) {
  try {
    const useDbAuthentication = process.env.USE_DB_AUTHENTICATION === 'true';
    const { success, team_id, error, status } = await authenticateUser(
      req,
      res,
      RateLimiterMode.CrawlStatus
    );
    if (!success) {
      return res.status(status).json({ error });
    }
    const sc = await getCrawl(req.params.jobId);
    if (!sc) {
      return res.status(404).json({ error: "Job not found" });
    }
    // check if the job belongs to the team
    if (useDbAuthentication) {
      const { data, error: supaError } = await supabase_service
        .from("bulljobs_teams")
        .select("*")
        .eq("job_id", req.params.jobId)
        .eq("team_id", team_id);
      if (supaError) {
        return res.status(500).json({ error: supaError.message });
      }
      if (data.length === 0) {
        return res.status(403).json({ error: "Unauthorized" });
      }
    }
    try {
      sc.cancelled = true;
      await saveCrawl(req.params.jobId, sc);
    } catch (error) {
      Logger.error(error);
    }
    res.json({
      status: "cancelled"
    });
  } catch (error) {
    Sentry.captureException(error);
    Logger.error(error);
    return res.status(500).json({ error: error.message });
  }
 }
--- a/apps/api/src/controllers/v1/crawl-status-ws.ts
+++ b/apps/api/src/controllers/v1/crawl-status-ws.ts
@ -0,0 +1,159 @@
 import { authMiddleware } from "../../routes/v1";
 import { RateLimiterMode } from "../../types";
 import { authenticateUser } from "../auth";
 import { CrawlStatusParams, CrawlStatusResponse, Document, ErrorResponse, legacyDocumentConverter, RequestWithAuth } from "./types";
 import { WebSocket } from "ws";
 import { v4 as uuidv4 } from "uuid";
 import { Logger } from "../../lib/logger";
 import { getCrawl, getCrawlExpiry, getCrawlJobs, getDoneJobsOrdered, getDoneJobsOrderedLength, isCrawlFinished, isCrawlFinishedLocked } from "../../lib/crawl-redis";
 import { getScrapeQueue } from "../../services/queue-service";
 import { getJob, getJobs } from "./crawl-status";
 import * as Sentry from "@sentry/node";
 type ErrorMessage = {
  type: "error",
  error: string,
 }
 type CatchupMessage = {
  type: "catchup",
  data: CrawlStatusResponse,
 }
 type DocumentMessage = {
  type: "document",
  data: Document,
 }
 type DoneMessage = { type: "done" }
 type Message = ErrorMessage | CatchupMessage | DoneMessage | DocumentMessage;
 function send(ws: WebSocket, msg: Message) {
  if (ws.readyState === 1) {
    return new Promise((resolve, reject) => {
      ws.send(JSON.stringify(msg), (err) => {
        if (err) reject(err);
        else resolve(null);
      });
    });
  }
 }
 function close(ws: WebSocket, code: number, msg: Message) {
  if (ws.readyState <= 1) {
    ws.close(code, JSON.stringify(msg));
  }
 }
 async function crawlStatusWS(ws: WebSocket, req: RequestWithAuth<CrawlStatusParams, undefined, undefined>) {
  const sc = await getCrawl(req.params.jobId);
  if (!sc) {
    return close(ws, 1008, { type: "error", error: "Job not found" });
  }
  if (sc.team_id !== req.auth.team_id) {
    return close(ws, 3003, { type: "error", error: "Forbidden" });
  }
  let doneJobIDs = [];
  let finished = false;
  const loop = async () => {
    if (finished) return;
    const jobIDs = await getCrawlJobs(req.params.jobId);
    if (jobIDs.length === doneJobIDs.length) {
      return close(ws, 1000, { type: "done" });
    }
    const notDoneJobIDs = jobIDs.filter(x => !doneJobIDs.includes(x));
    const jobStatuses = await Promise.all(notDoneJobIDs.map(async x => [x, await getScrapeQueue().getJobState(x)]));
    const newlyDoneJobIDs = jobStatuses.filter(x => x[1] === "completed" || x[1] === "failed").map(x => x[0]);
    for (const jobID of newlyDoneJobIDs) {
      const job = await getJob(jobID);
      if (job.returnvalue) {
        send(ws, {
          type: "document",
          data: legacyDocumentConverter(job.returnvalue),
        })
      } else {
        return close(ws, 3000, { type: "error", error: job.failedReason });
      }
    }
    setTimeout(loop, 1000);
  };
  setTimeout(loop, 1000);
  doneJobIDs = await getDoneJobsOrdered(req.params.jobId);
  const jobIDs = await getCrawlJobs(req.params.jobId);
  const jobStatuses = await Promise.all(jobIDs.map(x => getScrapeQueue().getJobState(x)));
  const status: Exclude<CrawlStatusResponse, ErrorResponse>["status"] = sc.cancelled ? "cancelled" : jobStatuses.every(x => x === "completed") ? "completed" : jobStatuses.some(x => x === "failed") ? "failed" : "scraping";
  const doneJobs = await getJobs(doneJobIDs);
  const data = doneJobs.map(x => x.returnvalue);
  send(ws, {
    type: "catchup",
    data: {
      status,
      total: jobIDs.length,
      completed: doneJobIDs.length,
      creditsUsed: jobIDs.length,
      expiresAt: (await getCrawlExpiry(req.params.jobId)).toISOString(),
      data: data.map(x => legacyDocumentConverter(x)),
    }
  });
  if (status !== "scraping") {
    finished = true;
    return close(ws, 1000, { type: "done" });
  }
 }
 // Basically just middleware and error wrapping
 export async function crawlStatusWSController(ws: WebSocket, req: RequestWithAuth<CrawlStatusParams, undefined, undefined>) {
  try {
    const { success, team_id, error, status, plan } = await authenticateUser(
      req,
      null,
      RateLimiterMode.CrawlStatus,
    );
    if (!success) {
      return close(ws, 3000, {
        type: "error",
        error,
      });
    }
    req.auth = { team_id, plan };
    await crawlStatusWS(ws, req);
  } catch (err) {
    Sentry.captureException(err);
    const id = uuidv4();
    let verbose = JSON.stringify(err);
    if (verbose === "{}") {
      if (err instanceof Error) {
        verbose = JSON.stringify({
          message: err.message,
          name: err.name,
          stack: err.stack,
        });
      }
    }
    Logger.error("Error occurred in WebSocket! (" + req.path + ") -- ID " + id + " -- " + verbose);
    return close(ws, 1011, {
      type: "error",
      error: "An unexpected error occurred. Please contact hello@firecrawl.com for help. Your exception ID is " + id
    });
  }
 }
--- a/apps/api/src/controllers/v1/crawl-status.ts
+++ b/apps/api/src/controllers/v1/crawl-status.ts
@ -0,0 +1,116 @@
 import { Response } from "express";
 import { CrawlStatusParams, CrawlStatusResponse, ErrorResponse, legacyDocumentConverter, RequestWithAuth } from "./types";
 import { getCrawl, getCrawlExpiry, getCrawlJobs, getDoneJobsOrdered, getDoneJobsOrderedLength } from "../../lib/crawl-redis";
 import { getScrapeQueue } from "../../services/queue-service";
 import { supabaseGetJobById, supabaseGetJobsById } from "../../lib/supabase-jobs";
 export async function getJob(id: string) {
  const job = await getScrapeQueue().getJob(id);
  if (!job) return job;
  if (process.env.USE_DB_AUTHENTICATION === "true") {
    const supabaseData = await supabaseGetJobById(id);
    if (supabaseData) {
      job.returnvalue = supabaseData.docs;
    }
  }
  job.returnvalue = Array.isArray(job.returnvalue) ? job.returnvalue[0] : job.returnvalue;
  return job;
 }
 export async function getJobs(ids: string[]) {
  const jobs = (await Promise.all(ids.map(x => getScrapeQueue().getJob(x)))).filter(x => x);
  if (process.env.USE_DB_AUTHENTICATION === "true") {
    const supabaseData = await supabaseGetJobsById(ids);
    supabaseData.forEach(x => {
      const job = jobs.find(y => y.id === x.job_id);
      if (job) {
        job.returnvalue = x.docs;
      }
    })
  }
  jobs.forEach(job => {
    job.returnvalue = Array.isArray(job.returnvalue) ? job.returnvalue[0] : job.returnvalue;
  });
  return jobs;
 }
 export async function crawlStatusController(req: RequestWithAuth<CrawlStatusParams, undefined, CrawlStatusResponse>, res: Response<CrawlStatusResponse>) {
  const sc = await getCrawl(req.params.jobId);
  if (!sc) {
    return res.status(404).json({ success: false, error: "Job not found" });
  }
  if (sc.team_id !== req.auth.team_id) {
    return res.status(403).json({ success: false, error: "Forbidden" });
  }
  const start = typeof req.query.skip === "string" ? parseInt(req.query.skip, 10) : 0;
  const end = typeof req.query.limit === "string" ? (start + parseInt(req.query.limit, 10) - 1) : undefined;
  const jobIDs = await getCrawlJobs(req.params.jobId);
  const jobStatuses = await Promise.all(jobIDs.map(x => getScrapeQueue().getJobState(x)));
  const status: Exclude<CrawlStatusResponse, ErrorResponse>["status"] = sc.cancelled ? "cancelled" : jobStatuses.every(x => x === "completed") ? "completed" : jobStatuses.some(x => x === "failed") ? "failed" : "scraping";
  const doneJobsLength = await getDoneJobsOrderedLength(req.params.jobId);
  const doneJobsOrder = await getDoneJobsOrdered(req.params.jobId, start, end ?? -1);
  let doneJobs = [];
  if (end === undefined) { // determine 10 megabyte limit
    let bytes = 0;
    const bytesLimit = 10485760; // 10 MiB in bytes
    const factor = 100; // chunking for faster retrieval
    for (let i = 0; i < doneJobsOrder.length && bytes < bytesLimit; i += factor) {
      // get current chunk and retrieve jobs
      const currentIDs = doneJobsOrder.slice(i, i+factor);
      const jobs = await getJobs(currentIDs);
      // iterate through jobs and add them one them one to the byte counter
      // both loops will break once we cross the byte counter
      for (let ii = 0; ii < jobs.length && bytes < bytesLimit; ii++) {
        const job = jobs[ii];
        doneJobs.push(job);
        bytes += JSON.stringify(legacyDocumentConverter(job.returnvalue)).length;
      }
    }
    // if we ran over the bytes limit, remove the last document
    if (bytes > bytesLimit) {
      doneJobs.splice(doneJobs.length - 1, 1);
    }
  } else {
    doneJobs = await getJobs(doneJobsOrder);
  }
  const data = doneJobs.map(x => x.returnvalue);
  const nextURL = new URL(`${req.protocol}://${req.get("host")}/v1/crawl/${req.params.jobId}`);
  nextURL.searchParams.set("skip", (start + data.length).toString());
  if (typeof req.query.limit === "string") {
    nextURL.searchParams.set("limit", req.query.limit);
  }
  res.status(200).json({
    status,
    completed: doneJobsLength,
    total: jobIDs.length,
    creditsUsed: jobIDs.length,
    expiresAt: (await getCrawlExpiry(req.params.jobId)).toISOString(),
    next:
      status !== "scraping" && (start + data.length) === doneJobsLength // if there's not gonna be any documents after this
        ? undefined
        : nextURL.href,
    data: data.map(x => legacyDocumentConverter(x)),
  });
 }
--- a/apps/api/src/controllers/v1/crawl.ts
+++ b/apps/api/src/controllers/v1/crawl.ts
@ -0,0 +1,157 @@
 import { Response } from "express";
 import { v4 as uuidv4 } from "uuid";
 import {
  CrawlRequest,
  crawlRequestSchema,
  CrawlResponse,
  legacyCrawlerOptions,
  legacyScrapeOptions,
  RequestWithAuth,
 } from "./types";
 import {
  addCrawlJob,
  addCrawlJobs,
  crawlToCrawler,
  lockURL,
  lockURLs,
  saveCrawl,
  StoredCrawl,
 } from "../../lib/crawl-redis";
 import { logCrawl } from "../../services/logging/crawl_log";
 import { getScrapeQueue } from "../../services/queue-service";
 import { addScrapeJob } from "../../services/queue-jobs";
 import { Logger } from "../../lib/logger";
 import { getJobPriority } from "../../lib/job-priority";
 export async function crawlController(
  req: RequestWithAuth<{}, CrawlResponse, CrawlRequest>,
  res: Response<CrawlResponse>
 ) {
  req.body = crawlRequestSchema.parse(req.body);
  const id = uuidv4();
  await logCrawl(id, req.auth.team_id);
  const { remainingCredits } = req.account;
  const crawlerOptions = legacyCrawlerOptions(req.body);
  const pageOptions = legacyScrapeOptions(req.body.scrapeOptions);
  // TODO: @rafa, is this right? copied from v0
  if (Array.isArray(crawlerOptions.includes)) {
    for (const x of crawlerOptions.includes) {
      try {
        new RegExp(x);
      } catch (e) {
        return res.status(400).json({ success: false, error: e.message });
      }
    }
  }
  if (Array.isArray(crawlerOptions.excludes)) {
    for (const x of crawlerOptions.excludes) {
      try {
        new RegExp(x);
      } catch (e) {
        return res.status(400).json({ success: false, error: e.message });
      }
    }
  }
  crawlerOptions.limit = Math.min(remainingCredits, crawlerOptions.limit);
  const sc: StoredCrawl = {
    originUrl: req.body.url,
    crawlerOptions,
    pageOptions,
    team_id: req.auth.team_id,
    createdAt: Date.now(),
    plan: req.auth.plan,
  };
  const crawler = crawlToCrawler(id, sc);
  try {
    sc.robots = await crawler.getRobotsTxt();
  } catch (e) {
    Logger.debug(
      `[Crawl] Failed to get robots.txt (this is probably fine!): ${JSON.stringify(
        e
      )}`
    );
  }
  await saveCrawl(id, sc);
  const sitemap = sc.crawlerOptions.ignoreSitemap
    ? null
    : await crawler.tryGetSitemap();
  if (sitemap !== null && sitemap.length > 0) {
    let jobPriority = 20;
      // If it is over 1000, we need to get the job priority,
      // otherwise we can use the default priority of 20
      if(sitemap.length > 1000){
        // set base to 21
        jobPriority = await getJobPriority({plan: req.auth.plan, team_id: req.auth.team_id, basePriority: 21})
      }
    const jobs = sitemap.map((x) => {
      const url = x.url;
      const uuid = uuidv4();
      return {
        name: uuid,
        data: {
          url,
          mode: "single_urls",
          team_id: req.auth.team_id,
          crawlerOptions,
          pageOptions,
          origin: "api",
          crawl_id: id,
          sitemapped: true,
          v1: true,
        },
        opts: {
          jobId: uuid,
          priority: 20,
        },
      };
    });
    await lockURLs(
      id,
      jobs.map((x) => x.data.url)
    );
    await addCrawlJobs(
      id,
      jobs.map((x) => x.opts.jobId)
    );
    await getScrapeQueue().addBulk(jobs);
  } else {
    await lockURL(id, sc, req.body.url);
    const job = await addScrapeJob(
      {
        url: req.body.url,
        mode: "single_urls",
        crawlerOptions: crawlerOptions,
        team_id: req.auth.team_id,
        pageOptions: pageOptions,
        origin: "api",
        crawl_id: id,
        webhook: req.body.webhook,
        v1: true,
      },
      {
        priority: 15,
      }
    );
    await addCrawlJob(id, job.id);
  }
  return res.status(200).json({
    success: true,
    id,
    url: `${req.protocol}://${req.get("host")}/v1/crawl/${id}`,
  });
 }
--- a/apps/api/src/controllers/v1/liveness.ts
+++ b/apps/api/src/controllers/v1/liveness.ts
@ -0,0 +1,6 @@
 import { Request, Response } from "express";
 export async function livenessController(req: Request, res: Response) {
  //TODO: add checks if the application is live and healthy like checking the redis connection
  res.status(200).json({ status: "ok" });
 }
--- a/apps/api/src/controllers/v1/map.ts
+++ b/apps/api/src/controllers/v1/map.ts
@ -0,0 +1,130 @@
 import { Response } from "express";
 import { v4 as uuidv4 } from "uuid";
 import {
  legacyCrawlerOptions,
  mapRequestSchema,
  RequestWithAuth,
 } from "./types";
 import { crawlToCrawler, StoredCrawl } from "../../lib/crawl-redis";
 import { MapResponse, MapRequest } from "./types";
 import { configDotenv } from "dotenv";
 import {
  checkAndUpdateURLForMap,
  isSameDomain,
  isSameSubdomain,
  removeDuplicateUrls,
 } from "../../lib/validateUrl";
 import { fireEngineMap } from "../../search/fireEngine";
 import { billTeam } from "../../services/billing/credit_billing";
 import { logJob } from "../../services/logging/log_job";
 import { performCosineSimilarity } from "../../lib/map-cosine";
 configDotenv();
 export async function mapController(
  req: RequestWithAuth<{}, MapResponse, MapRequest>,
  res: Response<MapResponse>
 ) {
  const startTime = new Date().getTime();
  req.body = mapRequestSchema.parse(req.body);
  const limit = req.body.limit;
  const id = uuidv4();
  let links: string[] = [req.body.url];
  const sc: StoredCrawl = {
    originUrl: req.body.url,
    crawlerOptions: legacyCrawlerOptions(req.body),
    pageOptions: {},
    team_id: req.auth.team_id,
    createdAt: Date.now(),
    plan: req.auth.plan,
  };
  const crawler = crawlToCrawler(id, sc);
  const sitemap = req.body.ignoreSitemap ? null : await crawler.tryGetSitemap();
  if (sitemap !== null) {
    sitemap.map((x) => {
      links.push(x.url);
    });
  }
  let urlWithoutWww = req.body.url.replace("www.", "");
  let mapUrl = req.body.search
    ? `"${req.body.search}" site:${urlWithoutWww}`
    : `site:${req.body.url}`;
  // www. seems to exclude subdomains in some cases
  const mapResults = await fireEngineMap(mapUrl, {
    // limit to 50 results (beta)
    numResults: Math.min(limit, 50),
  });
  if (mapResults.length > 0) {
    if (req.body.search) {
      // Ensure all map results are first, maintaining their order
      links = [
        mapResults[0].url,
        ...mapResults.slice(1).map((x) => x.url),
        ...links,
      ];
    } else {
      mapResults.map((x) => {
        links.push(x.url);
      });
    }
  }
  // Perform cosine similarity between the search query and the list of links
  if (req.body.search) {
    const searchQuery = req.body.search.toLowerCase();
    links = performCosineSimilarity(links, searchQuery);
  }
  links = links.map((x) => checkAndUpdateURLForMap(x).url.trim());
  // allows for subdomains to be included
  links = links.filter((x) => isSameDomain(x, req.body.url));
  // if includeSubdomains is false, filter out subdomains
  if (!req.body.includeSubdomains) {
    links = links.filter((x) => isSameSubdomain(x, req.body.url));
  }
  // remove duplicates that could be due to http/https or www
  links = removeDuplicateUrls(links);
  await billTeam(req.auth.team_id, 1);
  const endTime = new Date().getTime();
  const timeTakenInSeconds = (endTime - startTime) / 1000;
  const linksToReturn = links.slice(0, limit);
  logJob({
    job_id: id,
    success: links.length > 0,
    message: "Map completed",
    num_docs: links.length,
    docs: linksToReturn,
    time_taken: timeTakenInSeconds,
    team_id: req.auth.team_id,
    mode: "map",
    url: req.body.url,
    crawlerOptions: {},
    pageOptions: {},
    origin: req.body.origin,
    extractor_options: { mode: "markdown" },
    num_tokens: 0,
  });
  return res.status(200).json({
    success: true,
    links: linksToReturn,
  });
 }
--- a/apps/api/src/controllers/v1/readiness.ts
+++ b/apps/api/src/controllers/v1/readiness.ts
@ -0,0 +1,6 @@
 import { Request, Response } from "express";
 export async function readinessController(req: Request, res: Response) {
  // TODO: add checks when the application is ready to serve traffic
  res.status(200).json({ status: "ok" });
 }
--- a/apps/api/src/controllers/v1/scrape.ts
+++ b/apps/api/src/controllers/v1/scrape.ts
@ -0,0 +1,109 @@
 import { Request, Response } from "express";
 import { Logger } from '../../lib/logger';
 import { Document, legacyDocumentConverter, legacyScrapeOptions, RequestWithAuth, ScrapeRequest, scrapeRequestSchema, ScrapeResponse } from "./types";
 import { billTeam } from "../../services/billing/credit_billing";
 import { v4 as uuidv4 } from 'uuid';
 import { numTokensFromString } from "../../lib/LLM-extraction/helpers";
 import { addScrapeJob, waitForJob } from "../../services/queue-jobs";
 import { logJob } from "../../services/logging/log_job";
 import { getJobPriority } from "../../lib/job-priority";
 import { PlanType } from "../../types";
 export async function scrapeController(req: RequestWithAuth<{}, ScrapeResponse, ScrapeRequest>, res: Response<ScrapeResponse>) {
  req.body = scrapeRequestSchema.parse(req.body);
  let earlyReturn = false;
  const origin = req.body.origin;
  const timeout = req.body.timeout;
  const pageOptions = legacyScrapeOptions(req.body);
  const jobId = uuidv4();
  const startTime = new Date().getTime();
  const jobPriority = await getJobPriority({plan: req.auth.plan as PlanType, team_id: req.auth.team_id, basePriority: 10})
  const job = await addScrapeJob({
    url: req.body.url,
    mode: "single_urls",
    crawlerOptions: {},
    team_id: req.auth.team_id,
    pageOptions,
    extractorOptions: {},
    origin: req.body.origin,
    is_scrape: true,
  }, {}, jobId, jobPriority);
  let doc: any | undefined;
  try {
    doc = (await waitForJob(job.id, timeout))[0];
  } catch (e) {
    Logger.error(`Error in scrapeController: ${e}`);
    if (e instanceof Error && e.message.startsWith("Job wait")) {
      return res.status(408).json({
        success: false,
        error: "Request timed out",
      });
    } else {
      return res.status(500).json({
        success: false,
        error: "Internal server error",
      });
    }
  }
  await job.remove();
  if (!doc) {
    console.error("!!! PANIC DOC IS", doc, job);
    return res.status(200).json({
      success: true,
      warning: "No page found",
      data: doc
    });
  }
  delete doc.index;
  delete doc.provider;
  const endTime = new Date().getTime();
  const timeTakenInSeconds = (endTime - startTime) / 1000;
  const numTokens = (doc && doc.markdown) ? numTokensFromString(doc.markdown, "gpt-3.5-turbo") : 0;
  let creditsToBeBilled = 1; // Assuming 1 credit per document
  if (earlyReturn) {
    // Don't bill if we're early returning
    return;
  }
  const billingResult = await billTeam(
    req.auth.team_id,
    creditsToBeBilled
  );
  if (!billingResult.success) {
    return res.status(402).json({
      success: false,
      error: "Failed to bill team. Insufficient credits or subscription not found.",
    });
  }
  logJob({
    job_id: jobId,
    success: true,
    message: "Scrape completed",
    num_docs: 1,
    docs: [doc],
    time_taken: timeTakenInSeconds,
    team_id: req.auth.team_id,
    mode: "scrape",
    url: req.body.url,
    crawlerOptions: {},
    pageOptions: pageOptions,
    origin: origin, 
    extractor_options: { mode: "markdown" },
    num_tokens: numTokens,
  });
  return res.status(200).json({
    success: true,
    data: legacyDocumentConverter(doc),
  });
 }
--- a/apps/api/src/controllers/v1/types.ts
+++ b/apps/api/src/controllers/v1/types.ts
@ -0,0 +1,321 @@
 import { Request, Response } from "express";
 import { z } from "zod";
 import { isUrlBlocked } from "../../scraper/WebScraper/utils/blocklist";
 import { PageOptions } from "../../lib/entities";
 import { protocolIncluded, checkUrl } from "../../lib/validateUrl";
 import { PlanType } from "../../types";
 export type Format =
  | "markdown"
  | "html"
  | "rawHtml"
  | "links"
  | "screenshot"
  | "screenshot@fullPage";
 export const url = z.preprocess(
  (x) => {
    if (!protocolIncluded(x as string)) {
      return `http://${x}`;
    }
    return x;
  },
  z
    .string()
    .url()
    .regex(/^https?:\/\//, "URL uses unsupported protocol")
    .refine(
      (x) => /\.[a-z]{2,}(\/|$)/i.test(x),
      "URL must have a valid top-level domain or be a valid path"
    )
    .refine(
      (x) => checkUrl(x as string),
      "Invalid URL"
    )
    .refine(
      (x) => !isUrlBlocked(x as string),
      "Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it."
    )
 );
 const strictMessage = "Unrecognized key in body -- please review the v1 API documentation for request body changes";
 export const scrapeOptions = z.object({
  formats: z
    .enum([
      "markdown",
      "html",
      "rawHtml",
      "links",
      "screenshot",
      "screenshot@fullPage",
    ])
    .array()
    .optional()
    .default(["markdown"]),
  headers: z.record(z.string(), z.string()).optional(),
  includeTags: z.string().array().optional(),
  excludeTags: z.string().array().optional(),
  onlyMainContent: z.boolean().default(true),
  timeout: z.number().int().positive().finite().safe().default(30000), // default?
  waitFor: z.number().int().nonnegative().finite().safe().default(0),
  parsePDF: z.boolean().default(true),
 }).strict(strictMessage);
 export type ScrapeOptions = z.infer<typeof scrapeOptions>;
 export const scrapeRequestSchema = scrapeOptions.extend({
  url,
  origin: z.string().optional().default("api"),
 }).strict(strictMessage);
 // export type ScrapeRequest = {
 //   url: string;
 //   formats?: Format[];
 //   headers?: { [K: string]: string };
 //   includeTags?: string[];
 //   excludeTags?: string[];
 //   onlyMainContent?: boolean;
 //   timeout?: number;
 //   waitFor?: number;
 // }
 export type ScrapeRequest = z.infer<typeof scrapeRequestSchema>;
 const crawlerOptions = z.object({
  includePaths: z.string().array().default([]),
  excludePaths: z.string().array().default([]),
  maxDepth: z.number().default(10), // default?
  limit: z.number().default(10000), // default?
  allowBackwardLinks: z.boolean().default(false), // >> TODO: CHANGE THIS NAME???
  allowExternalLinks: z.boolean().default(false),
  ignoreSitemap: z.boolean().default(true),
 }).strict(strictMessage);
 // export type CrawlerOptions = {
 //   includePaths?: string[];
 //   excludePaths?: string[];
 //   maxDepth?: number;
 //   limit?: number;
 //   allowBackwardLinks?: boolean; // >> TODO: CHANGE THIS NAME???
 //   allowExternalLinks?: boolean;
 //   ignoreSitemap?: boolean;
 // };
 export type CrawlerOptions = z.infer<typeof crawlerOptions>;
 export const crawlRequestSchema = crawlerOptions.extend({
  url,
  origin: z.string().optional().default("api"),
  scrapeOptions: scrapeOptions.omit({ timeout: true }).default({}),
  webhook: z.string().url().optional(),
  limit: z.number().default(10000),
 }).strict(strictMessage);
 // export type CrawlRequest = {
 //   url: string;
 //   crawlerOptions?: CrawlerOptions;
 //   scrapeOptions?: Exclude<ScrapeRequest, "url">;
 // };
 export type CrawlRequest = z.infer<typeof crawlRequestSchema>;
 export const mapRequestSchema = crawlerOptions.extend({
  url,
  origin: z.string().optional().default("api"),
  includeSubdomains: z.boolean().default(true),
  search: z.string().optional(),
  ignoreSitemap: z.boolean().default(false),
  limit: z.number().min(1).max(50).default(5000).optional(),
 }).strict(strictMessage);
 // export type MapRequest = {
 //   url: string;
 //   crawlerOptions?: CrawlerOptions;
 // };
 export type MapRequest = z.infer<typeof mapRequestSchema>;
 export type Document = {
  markdown?: string;
  html?: string;
  rawHtml?: string;
  links?: string[];
  screenshot?: string;
  metadata: {
    title?: string;
    description?: string;
    language?: string;
    keywords?: string;
    robots?: string;
    ogTitle?: string;
    ogDescription?: string;
    ogUrl?: string;
    ogImage?: string;
    ogAudio?: string;
    ogDeterminer?: string;
    ogLocale?: string;
    ogLocaleAlternate?: string[];
    ogSiteName?: string;
    ogVideo?: string;
    dcTermsCreated?: string;
    dcDateCreated?: string;
    dcDate?: string;
    dcTermsType?: string;
    dcType?: string;
    dcTermsAudience?: string;
    dcTermsSubject?: string;
    dcSubject?: string;
    dcDescription?: string;
    dcTermsKeywords?: string;
    modifiedTime?: string;
    publishedTime?: string;
    articleTag?: string;
    articleSection?: string;
    sourceURL?: string;
    statusCode?: number;
    error?: string;
  };
 };
 export type ErrorResponse = {
  success: false;
  error: string;
  details?: any;
 };
 export type ScrapeResponse =
  | ErrorResponse
  | {
      success: true;
      warning?: string;
      data: Document;
    };
 export interface ScrapeResponseRequestTest {
  statusCode: number;
  body: ScrapeResponse;
  error?: string;
 }
 export type CrawlResponse =
  | ErrorResponse
  | {
      success: true;
      id: string;
      url: string;
    };
 export type MapResponse =
  | ErrorResponse
  | {
      success: true;
      links: string[];
    };
 export type CrawlStatusParams = {
  jobId: string;
 };
 export type CrawlStatusResponse =
  | ErrorResponse
  | {
      status: "scraping" | "completed" | "failed" | "cancelled";
      completed: number;
      total: number;
      creditsUsed: number;
      expiresAt: string;
      next?: string;
      data: Document[];
    };
 type AuthObject = {
  team_id: string;
  plan: PlanType;
 };
 type Account = {
  remainingCredits: number;
 };
 export interface RequestWithMaybeAuth<
  ReqParams = {},
  ReqBody = undefined,
  ResBody = undefined
 > extends Request<ReqParams, ReqBody, ResBody> {
  auth?: AuthObject;
  account?: Account;
 }
 export interface RequestWithAuth<
  ReqParams = {},
  ReqBody = undefined,
  ResBody = undefined,
 > extends Request<ReqParams, ReqBody, ResBody> {
  auth: AuthObject;
  account?: Account;
 }
 export interface ResponseWithSentry<
  ResBody = undefined,
 > extends Response<ResBody> {
  sentry?: string,
 }
 export function legacyCrawlerOptions(x: CrawlerOptions) {
  return {
    includes: x.includePaths,
    excludes: x.excludePaths,
    maxCrawledLinks: x.limit,
    maxCrawledDepth: x.maxDepth,
    limit: x.limit,
    generateImgAltText: false,
    allowBackwardCrawling: x.allowBackwardLinks,
    allowExternalContentLinks: x.allowExternalLinks,
  };
 }
 export function legacyScrapeOptions(x: ScrapeOptions): PageOptions {
  return {
    includeMarkdown: x.formats.includes("markdown"),
    includeHtml: x.formats.includes("html"),
    includeRawHtml: x.formats.includes("rawHtml"),
    onlyIncludeTags: x.includeTags,
    removeTags: x.excludeTags,
    onlyMainContent: x.onlyMainContent,
    waitFor: x.waitFor,
    includeLinks: x.formats.includes("links"),
    screenshot: x.formats.includes("screenshot"),
    fullPageScreenshot: x.formats.includes("screenshot@fullPage"),
    parsePDF: x.parsePDF,
  };
 }
 export function legacyDocumentConverter(doc: any): Document {
  if (doc.metadata) {
    if (doc.metadata.screenshot) {
      doc.screenshot = doc.metadata.screenshot;
      delete doc.metadata.screenshot;
    }
    if (doc.metadata.fullPageScreenshot) {
      doc.fullPageScreenshot = doc.metadata.fullPageScreenshot;
      delete doc.metadata.fullPageScreenshot;
    }
  }
  return {
    markdown: doc.markdown,
    links: doc.linksOnPage,
    rawHtml: doc.rawHtml,
    html: doc.html,
    screenshot: doc.screenshot ?? doc.fullPageScreenshot,
    metadata: {
      ...doc.metadata,
      pageError: undefined,
      pageStatusCode: undefined,
      error: doc.metadata.pageError,
      statusCode: doc.metadata.pageStatusCode,
    },
  };
 }
--- a/apps/api/src/index.ts
+++ b/apps/api/src/index.ts
@ -1,8 +1,10 @@
-import express from "express";
+import "dotenv/config";
 import "./services/sentry"
 import * as Sentry from "@sentry/node";
 import express, { NextFunction, Request, Response } from "express";
 import bodyParser from "body-parser";
 import cors from "cors";
-import "dotenv/config";
+import { getScrapeQueue } from "./services/queue-service";
 import { getWebScraperQueue } from "./services/queue-service";
 import { v0Router } from "./routes/v0";
 import { initSDK } from "@hyperdx/node-opentelemetry";
 import cluster from "cluster";
@ -13,6 +15,12 @@ import { ScrapeEvents } from "./lib/scrape-events";
 import http from 'node:http';
 import https from 'node:https';
 import CacheableLookup  from 'cacheable-lookup';
 import { v1Router } from "./routes/v1";
 import expressWs from "express-ws";
 import { crawlStatusWSController } from "./controllers/v1/crawl-status-ws";
 import { ErrorResponse, ResponseWithSentry } from "./controllers/v1/types";
 import { ZodError } from "zod";
 import { v4 as uuidv4 } from "uuid";
 const { createBullBoard } = require("@bull-board/api");
 const { BullAdapter } = require("@bull-board/api/bullAdapter");
@ -45,7 +53,8 @@ if (cluster.isMaster) {
    }
  });
 } else {
-  const app = express();
+  const ws = expressWs(express());
  const app = ws.app;
  global.isProduction = process.env.IS_PRODUCTION === "true";
@ -58,7 +67,7 @@ if (cluster.isMaster) {
  serverAdapter.setBasePath(`/admin/${process.env.BULL_AUTH_KEY}/queues`);
  const { addQueue, removeQueue, setQueues, replaceQueues } = createBullBoard({
-    queues: [new BullAdapter(getWebScraperQueue())],
+    queues: [new BullAdapter(getScrapeQueue())],
    serverAdapter: serverAdapter,
  });
@ -78,6 +87,7 @@ if (cluster.isMaster) {
  // register router
  app.use(v0Router);
  app.use("/v1", v1Router);
  app.use(adminRouter);
  const DEFAULT_PORT = process.env.PORT ?? 3002;
@ -104,9 +114,9 @@ if (cluster.isMaster) {
  app.get(`/serverHealthCheck`, async (req, res) => {
    try {
-      const webScraperQueue = getWebScraperQueue();
+      const scrapeQueue = getScrapeQueue();
      const [waitingJobs] = await Promise.all([
-        webScraperQueue.getWaitingCount(),
+        scrapeQueue.getWaitingCount(),
      ]);
      const noWaitingJobs = waitingJobs === 0;
@ -115,6 +125,7 @@ if (cluster.isMaster) {
        waitingJobs,
      });
    } catch (error) {
      Sentry.captureException(error);
      Logger.error(error);
      return res.status(500).json({ error: error.message });
    }
@ -126,9 +137,9 @@ if (cluster.isMaster) {
      const timeout = 60000; // 1 minute // The timeout value for the check in milliseconds
      const getWaitingJobsCount = async () => {
-        const webScraperQueue = getWebScraperQueue();
+        const scrapeQueue = getScrapeQueue();
        const [waitingJobsCount] = await Promise.all([
-          webScraperQueue.getWaitingCount(),
+          scrapeQueue.getWaitingCount(),
        ]);
        return waitingJobsCount;
@ -166,6 +177,7 @@ if (cluster.isMaster) {
            }, timeout);
          }
        } catch (error) {
          Sentry.captureException(error);
          Logger.debug(error);
        }
      };
@ -178,16 +190,42 @@ if (cluster.isMaster) {
    res.send({ isProduction: global.isProduction });
  });
  Sentry.setupExpressErrorHandler(app);
  app.use((err: unknown, req: Request<{}, ErrorResponse, undefined>, res: ResponseWithSentry<ErrorResponse>, next: NextFunction) => {
    if (err instanceof ZodError) {
        res.status(400).json({ success: false, error: "Bad Request", details: err.errors });
    } else {
        const id = res.sentry ?? uuidv4();
        let verbose = JSON.stringify(err);
        if (verbose === "{}") {
            if (err instanceof Error) {
                verbose = JSON.stringify({
                    message: err.message,
                    name: err.name,
                    stack: err.stack,
                });
            }
        }
        Logger.error("Error occurred in request! (" + req.path + ") -- ID " + id  + " -- " + verbose);
        res.status(500).json({ success: false, error: "An unexpected error occurred. Please contact hello@firecrawl.com for help. Your exception ID is " + id });
    }
  });
  Logger.info(`Worker ${process.pid} started`);
 }
-const wsq = getWebScraperQueue();
+
-
+
-wsq.on("waiting", j => ScrapeEvents.logJobEvent(j, "waiting"));
+// const sq = getScrapeQueue();
-wsq.on("active", j => ScrapeEvents.logJobEvent(j, "active"));
+
-wsq.on("completed", j => ScrapeEvents.logJobEvent(j, "completed"));
+// sq.on("waiting", j => ScrapeEvents.logJobEvent(j, "waiting"));
-wsq.on("paused", j => ScrapeEvents.logJobEvent(j, "paused"));
+// sq.on("active", j => ScrapeEvents.logJobEvent(j, "active"));
-wsq.on("resumed", j => ScrapeEvents.logJobEvent(j, "resumed"));
+// sq.on("completed", j => ScrapeEvents.logJobEvent(j, "completed"));
-wsq.on("removed", j => ScrapeEvents.logJobEvent(j, "removed"));
+// sq.on("paused", j => ScrapeEvents.logJobEvent(j, "paused"));
 // sq.on("resumed", j => ScrapeEvents.logJobEvent(j, "resumed"));
 // sq.on("removed", j => ScrapeEvents.logJobEvent(j, "removed"));
--- a/apps/api/src/lib/LLM-extraction/index.ts
+++ b/apps/api/src/lib/LLM-extraction/index.ts
@ -46,7 +46,7 @@ export async function generateCompletions(
          return completionResult;
        } catch (error) {
          Logger.error(`Error generating completions: ${error}`);
-          throw new Error(`Error generating completions: ${error.message}`);
+          throw error;
        }
        default:
          throw new Error("Invalid client");
--- a/apps/api/src/lib/LLM-extraction/models.ts
+++ b/apps/api/src/lib/LLM-extraction/models.ts
@ -15,7 +15,7 @@ const defaultPrompt =
 function prepareOpenAIDoc(
  document: Document,
  mode: "markdown" | "raw-html"
-): [OpenAI.Chat.Completions.ChatCompletionContentPart[], number] {
+): [OpenAI.Chat.Completions.ChatCompletionContentPart[], number] | null {
  let markdown = document.markdown;
@ -27,9 +27,10 @@ function prepareOpenAIDoc(
  // Check if the markdown content exists in the document
  if (!extractionTarget) {
-    throw new Error(
+    return null;
-      `${mode} content is missing in the document. This is likely due to an error in the scraping process. Please try again or reach out to help@mendable.ai`
+    // throw new Error(
-    );
+    //   `${mode} content is missing in the document. This is likely due to an error in the scraping process. Please try again or reach out to help@mendable.ai`
    // );
  }
@ -64,7 +65,16 @@ export async function generateOpenAICompletions({
  mode: "markdown" | "raw-html";
 }): Promise<Document> {
  const openai = client as OpenAI;
-  const [content, numTokens] = prepareOpenAIDoc(document, mode);
+  const preparedDoc = prepareOpenAIDoc(document, mode);
  if (preparedDoc === null) {
    return {
      ...document,
      warning: "LLM extraction was not performed since the document's content is empty or missing.",
    };
  }
  const [content, numTokens] = preparedDoc;
  const completion = await openai.chat.completions.create({
    model,
--- a/apps/api/src/lib/tests/job-priority.test.ts
+++ b/apps/api/src/lib/tests/job-priority.test.ts
@ -0,0 +1,134 @@
 import {
  getJobPriority,
  addJobPriority,
  deleteJobPriority,
 } from "../job-priority";
 import { redisConnection } from "../../services/queue-service";
 import { PlanType } from "../../types";
 jest.mock("../../services/queue-service", () => ({
  redisConnection: {
    sadd: jest.fn(),
    srem: jest.fn(),
    scard: jest.fn(),
    expire: jest.fn(),
  },
 }));
 describe("Job Priority Tests", () => {
  afterEach(() => {
    jest.clearAllMocks();
  });
  test("addJobPriority should add job_id to the set and set expiration", async () => {
    const team_id = "team1";
    const job_id = "job1";
    await addJobPriority(team_id, job_id);
    expect(redisConnection.sadd).toHaveBeenCalledWith(
      `limit_team_id:${team_id}`,
      job_id
    );
    expect(redisConnection.expire).toHaveBeenCalledWith(
      `limit_team_id:${team_id}`,
      60
    );
  });
  test("deleteJobPriority should remove job_id from the set", async () => {
    const team_id = "team1";
    const job_id = "job1";
    await deleteJobPriority(team_id, job_id);
    expect(redisConnection.srem).toHaveBeenCalledWith(
      `limit_team_id:${team_id}`,
      job_id
    );
  });
  test("getJobPriority should return correct priority based on plan and set length", async () => {
    const team_id = "team1";
    const plan: PlanType = "standard";
    (redisConnection.scard as jest.Mock).mockResolvedValue(150);
    const priority = await getJobPriority({ plan, team_id });
    expect(priority).toBe(10);
    (redisConnection.scard as jest.Mock).mockResolvedValue(250);
    const priorityExceeded = await getJobPriority({ plan, team_id });
    expect(priorityExceeded).toBe(20); // basePriority + Math.ceil((250 - 200) * 0.4)
  });
  test("getJobPriority should handle different plans correctly", async () => {
    const team_id = "team1";
    (redisConnection.scard as jest.Mock).mockResolvedValue(50);
    let plan: PlanType = "hobby";
    let priority = await getJobPriority({ plan, team_id });
    expect(priority).toBe(10);
    (redisConnection.scard as jest.Mock).mockResolvedValue(150);
    plan = "hobby";
    priority = await getJobPriority({ plan, team_id });
    expect(priority).toBe(25); // basePriority + Math.ceil((150 - 50) * 0.3)
    (redisConnection.scard as jest.Mock).mockResolvedValue(25);
    plan = "free";
    priority = await getJobPriority({ plan, team_id });
    expect(priority).toBe(10);
    (redisConnection.scard as jest.Mock).mockResolvedValue(60);
    plan = "free";
    priority = await getJobPriority({ plan, team_id });
    expect(priority).toBe(28); // basePriority + Math.ceil((60 - 25) * 0.5)
  });
  test("addJobPriority should reset expiration time when adding new job", async () => {
    const team_id = "team1";
    const job_id1 = "job1";
    const job_id2 = "job2";
    await addJobPriority(team_id, job_id1);
    expect(redisConnection.expire).toHaveBeenCalledWith(
      `limit_team_id:${team_id}`,
      60
    );
    // Clear the mock calls
    (redisConnection.expire as jest.Mock).mockClear();
    // Add another job
    await addJobPriority(team_id, job_id2);
    expect(redisConnection.expire).toHaveBeenCalledWith(
      `limit_team_id:${team_id}`,
      60
    );
  });
  test("Set should expire after 60 seconds", async () => {
    const team_id = "team1";
    const job_id = "job1";
    jest.useFakeTimers();
    await addJobPriority(team_id, job_id);
    expect(redisConnection.expire).toHaveBeenCalledWith(
      `limit_team_id:${team_id}`,
      60
    );
    // Fast-forward time by 59 seconds
    jest.advanceTimersByTime(59000);
    // The set should still exist
    expect(redisConnection.scard).not.toHaveBeenCalled();
    // Fast-forward time by 2 more seconds (total 61 seconds)
    jest.advanceTimersByTime(2000);
    // Check if the set has been removed (scard should return 0)
    (redisConnection.scard as jest.Mock).mockResolvedValue(0);
    const setSize = await redisConnection.scard(`limit_team_id:${team_id}`);
    expect(setSize).toBe(0);
    jest.useRealTimers();
  });
 });
--- a/apps/api/src/lib/checkCredits.ts
+++ b/apps/api/src/lib/checkCredits.ts
@ -0,0 +1,32 @@
 import { checkTeamCredits } from "../services/billing/credit_billing";
 import { Logger } from "./logger";
 type checkCreditsResponse = {
  status: number;
  error: string | null;
 }
 export const checkCredits = async (team_id: string): Promise<checkCreditsResponse> => {
  try {
    const {
      success: creditsCheckSuccess,
      message: creditsCheckMessage
    } = await checkTeamCredits(team_id, 1);
    if (!creditsCheckSuccess) {
      return {
        status: 402,
        error: "Insufficient credits"
      };
    }
  } catch (error) {
    Logger.error(error);
    return {
      status: 500,
      error: "Error checking team credits. Please contact hello@firecrawl.com for help."
    };
  }
  return {
    status: 200,
    error: null
  }
 };
--- a/apps/api/src/lib/crawl-redis.ts
+++ b/apps/api/src/lib/crawl-redis.ts
@ -0,0 +1,124 @@
 import { WebCrawler } from "../scraper/WebScraper/crawler";
 import { redisConnection } from "../services/queue-service";
 export type StoredCrawl = {
    originUrl: string;
    crawlerOptions: any;
    pageOptions: any;
    team_id: string;
    plan: string;
    robots?: string;
    cancelled?: boolean;
    createdAt: number;
 };
 export async function saveCrawl(id: string, crawl: StoredCrawl) {
    await redisConnection.set("crawl:" + id, JSON.stringify(crawl));
    await redisConnection.expire("crawl:" + id, 24 * 60 * 60, "NX");
 }
 export async function getCrawl(id: string): Promise<StoredCrawl | null> {
    const x = await redisConnection.get("crawl:" + id);
    if (x === null) {
        return null;
    }
    return JSON.parse(x);
 }
 export async function getCrawlExpiry(id: string): Promise<Date> {
    const d = new Date();
    const ttl = await redisConnection.pttl("crawl:" + id);
    d.setMilliseconds(d.getMilliseconds() + ttl);
    d.setMilliseconds(0);
    return d;
 }
 export async function addCrawlJob(id: string, job_id: string) {
    await redisConnection.sadd("crawl:" + id + ":jobs", job_id);
    await redisConnection.expire("crawl:" + id + ":jobs", 24 * 60 * 60, "NX");
 }
 export async function addCrawlJobs(id: string, job_ids: string[]) {
    await redisConnection.sadd("crawl:" + id + ":jobs", ...job_ids);
    await redisConnection.expire("crawl:" + id + ":jobs", 24 * 60 * 60, "NX");
 }
 export async function addCrawlJobDone(id: string, job_id: string) {
    await redisConnection.sadd("crawl:" + id + ":jobs_done", job_id);
    await redisConnection.lpush("crawl:" + id + ":jobs_done_ordered", job_id);
    await redisConnection.expire("crawl:" + id + ":jobs_done", 24 * 60 * 60, "NX");
    await redisConnection.expire("crawl:" + id + ":jobs_done_ordered", 24 * 60 * 60, "NX");
 }
 export async function getDoneJobsOrderedLength(id: string): Promise<number> {
    return await redisConnection.llen("crawl:" + id + ":jobs_done_ordered");
 }
 export async function getDoneJobsOrdered(id: string, start = 0, end = -1): Promise<string[]> {
    return await redisConnection.lrange("crawl:" + id + ":jobs_done_ordered", start, end);
 }
 export async function isCrawlFinished(id: string) {
    return (await redisConnection.scard("crawl:" + id + ":jobs_done")) === (await redisConnection.scard("crawl:" + id + ":jobs"));
 }
 export async function isCrawlFinishedLocked(id: string) {
    return (await redisConnection.exists("crawl:" + id + ":finish"));
 }
 export async function finishCrawl(id: string) {
    if (await isCrawlFinished(id)) {
        const set = await redisConnection.setnx("crawl:" + id + ":finish", "yes");
        if (set === 1) {
            await redisConnection.expire("crawl:" + id + ":finish", 24 * 60 * 60);
        }
        return set === 1
    }
 }
 export async function getCrawlJobs(id: string): Promise<string[]> {
    return await redisConnection.smembers("crawl:" + id + ":jobs");
 }
 export async function lockURL(id: string, sc: StoredCrawl, url: string): Promise<boolean> {
    if (typeof sc.crawlerOptions?.limit === "number") {
        if (await redisConnection.scard("crawl:" + id + ":visited") >= sc.crawlerOptions.limit) {
            return false;
        }
    }
    const res = (await redisConnection.sadd("crawl:" + id + ":visited", url)) !== 0
    await redisConnection.expire("crawl:" + id + ":visited", 24 * 60 * 60, "NX");
    return res;
 }
 /// NOTE: does not check limit. only use if limit is checked beforehand e.g. with sitemap
 export async function lockURLs(id: string, urls: string[]): Promise<boolean> {
    const res = (await redisConnection.sadd("crawl:" + id + ":visited", ...urls)) !== 0
    await redisConnection.expire("crawl:" + id + ":visited", 24 * 60 * 60, "NX");
    return res;
 }
 export function crawlToCrawler(id: string, sc: StoredCrawl): WebCrawler {
    const crawler = new WebCrawler({
        jobId: id,
        initialUrl: sc.originUrl,
        includes: sc.crawlerOptions?.includes ?? [],
        excludes: sc.crawlerOptions?.excludes ?? [],
        maxCrawledLinks: sc.crawlerOptions?.maxCrawledLinks ?? 1000,
        maxCrawledDepth: sc.crawlerOptions?.maxDepth ?? 10,
        limit: sc.crawlerOptions?.limit ?? 10000,
        generateImgAltText: sc.crawlerOptions?.generateImgAltText ?? false,
        allowBackwardCrawling: sc.crawlerOptions?.allowBackwardCrawling ?? false,
        allowExternalContentLinks: sc.crawlerOptions?.allowExternalContentLinks ?? false,
    });
    if (sc.robots !== undefined) {
        try {
            crawler.importRobotsTxt(sc.robots);
        } catch (_) {}
    }
    return crawler;
 }
--- a/apps/api/src/lib/default-values.ts
+++ b/apps/api/src/lib/default-values.ts
@ -1,6 +1,6 @@
 export const defaultOrigin = "api";
-export const defaultTimeout = 45000; // 45 seconds
+export const defaultTimeout = 60000; // 60 seconds
 export const defaultPageOptions = {
  onlyMainContent: false,
@ -12,7 +12,8 @@ export const defaultPageOptions = {
 };
 export const defaultCrawlerOptions = {
-  allowBackwardCrawling: false
+  allowBackwardCrawling: false,
  limit: 10000
 }
 export const defaultCrawlPageOptions = {
--- a/apps/api/src/lib/entities.ts
+++ b/apps/api/src/lib/entities.ts
@ -11,6 +11,7 @@ export interface Progress {
 }
 export type PageOptions = {
  includeMarkdown?: boolean;
  onlyMainContent?: boolean;
  includeHtml?: boolean;
  includeRawHtml?: boolean;
@ -24,6 +25,10 @@ export type PageOptions = {
  parsePDF?: boolean;
  removeTags?: string | string[];
  onlyIncludeTags?: string | string[];
  includeLinks?: boolean;
  useFastMode?: boolean; // beta
  disableJSDom?: boolean; // beta
  atsv?: boolean; // beta
 };
 export type ExtractorOptions = {
@ -65,6 +70,8 @@ export type WebScraperOptions = {
  extractorOptions?: ExtractorOptions;
  concurrentRequests?: number;
  bullJobId?: string;
  priority?: number;
  teamId?: string;
 };
 export interface DocumentUrl {
@ -141,4 +148,5 @@ export interface FireEngineOptions{
  blockMedia?: boolean;
  blockAds?: boolean;
  disableJsDom?: boolean;
  atsv?: boolean; // beta
 }
--- a/apps/api/src/lib/html-to-markdown.ts
+++ b/apps/api/src/lib/html-to-markdown.ts
@ -1,5 +1,5 @@
-export function parseMarkdown(html: string) {
+export async function parseMarkdown(html: string) {
  var TurndownService = require("turndown");
  var turndownPluginGfm = require('joplin-turndown-plugin-gfm')
@ -21,7 +21,27 @@ export function parseMarkdown(html: string) {
  });
  var gfm = turndownPluginGfm.gfm;
  turndownService.use(gfm);
-  let markdownContent = turndownService.turndown(html);
+  let markdownContent = "";
  const turndownPromise = new Promise<string>((resolve, reject) => {
    try {
      const result = turndownService.turndown(html);
      resolve(result);
    } catch (error) {
      reject("Error converting HTML to Markdown: " + error);
    }
  });
  const timeoutPromise = new Promise<string>((resolve, reject) => {
    const timeout = 5000; // Timeout in milliseconds
    setTimeout(() => reject("Conversion timed out after " + timeout + "ms"), timeout);
  });
  try {
    markdownContent = await Promise.race([turndownPromise, timeoutPromise]);
  } catch (error) {
    console.error(error);
    return ""; // Optionally return an empty string or handle the error as needed
  }
  // multiple line links
  let insideLinkContent = false;
--- a/apps/api/src/lib/job-priority.ts
+++ b/apps/api/src/lib/job-priority.ts
@ -0,0 +1,91 @@
 import { redisConnection } from "../../src/services/queue-service";
 import { PlanType } from "../../src/types";
 import { Logger } from "./logger";
 const SET_KEY_PREFIX = "limit_team_id:";
 export async function addJobPriority(team_id, job_id) {
  try {
    const setKey = SET_KEY_PREFIX + team_id;
    // Add scrape job id to the set
    await redisConnection.sadd(setKey, job_id);
    // This approach will reset the expiration time to 60 seconds every time a new job is added to the set.
    await redisConnection.expire(setKey, 60);
  } catch (e) {
    Logger.error(`Add job priority (sadd) failed: ${team_id}, ${job_id}`);
  }
 }
 export async function deleteJobPriority(team_id, job_id) {
  try {
    const setKey = SET_KEY_PREFIX + team_id;
    // remove job_id from the set
    await redisConnection.srem(setKey, job_id);
  } catch (e) {
    Logger.error(`Delete job priority (srem) failed: ${team_id}, ${job_id}`);
  }
 }
 export async function getJobPriority({
  plan,
  team_id,
  basePriority = 10,
 }: {
  plan: PlanType;
  team_id: string;
  basePriority?: number;
 }): Promise<number> {
  try {
    const setKey = SET_KEY_PREFIX + team_id;
    // Get the length of the set
    const setLength = await redisConnection.scard(setKey);
    // Determine the priority based on the plan and set length
    let planModifier = 1;
    let bucketLimit = 0;
    switch (plan) {
      case "free":
        bucketLimit = 25;
        planModifier = 0.5;
        break;
      case "hobby":
        bucketLimit = 100;
        planModifier = 0.3;
        break;
      case "standard":
      case "standardnew":
        bucketLimit = 200;
        planModifier = 0.2;
        break;
      case "growth":
      case "growthdouble":
        bucketLimit = 400;
        planModifier = 0.1;
        break;
      default:
        bucketLimit = 25;
        planModifier = 1;
        break;
    }
    // if length set is smaller than set, just return base priority
    if (setLength <= bucketLimit) {
      return basePriority;
    } else {
      // If not, we keep base priority + planModifier
      return Math.ceil(
        basePriority + Math.ceil((setLength - bucketLimit) * planModifier)
      );
    }
  } catch (e) {
    Logger.error(
      `Get job priority failed: ${team_id}, ${plan}, ${basePriority}`
    );
    return basePriority;
  }
 }
--- a/apps/api/src/lib/map-cosine.ts
+++ b/apps/api/src/lib/map-cosine.ts
@ -0,0 +1,46 @@
 import { Logger } from "./logger";
 export function performCosineSimilarity(links: string[], searchQuery: string) {
  try {
    // Function to calculate cosine similarity
    const cosineSimilarity = (vec1: number[], vec2: number[]): number => {
      const dotProduct = vec1.reduce((sum, val, i) => sum + val * vec2[i], 0);
      const magnitude1 = Math.sqrt(
        vec1.reduce((sum, val) => sum + val * val, 0)
      );
      const magnitude2 = Math.sqrt(
        vec2.reduce((sum, val) => sum + val * val, 0)
      );
      if (magnitude1 === 0 || magnitude2 === 0) return 0;
      return dotProduct / (magnitude1 * magnitude2);
    };
    // Function to convert text to vector
    const textToVector = (text: string): number[] => {
      const words = searchQuery.toLowerCase().split(/\W+/);
      return words.map((word) => {
        const count = (text.toLowerCase().match(new RegExp(word, "g")) || [])
          .length;
        return count / text.length;
      });
    };
    // Calculate similarity scores
    const similarityScores = links.map((link) => {
      const linkVector = textToVector(link);
      const searchVector = textToVector(searchQuery);
      return cosineSimilarity(linkVector, searchVector);
    });
    // Sort links based on similarity scores and print scores
    const a = links
      .map((link, index) => ({ link, score: similarityScores[index] }))
      .sort((a, b) => b.score - a.score);
    links = a.map((item) => item.link);
    return links;
  } catch (error) {
    Logger.error(`Error performing cosine similarity: ${error}`);
    return links;
  }
 }
--- a/apps/api/src/lib/scrape-events.ts
+++ b/apps/api/src/lib/scrape-events.ts
@ -1,4 +1,4 @@
-import { Job, JobId } from "bull";
+import { Job } from "bullmq";
 import type { baseScrapers } from "../scraper/WebScraper/single_url";
 import { supabase_service as supabase } from "../services/supabase";
 import { Logger } from "./logger";
@ -70,7 +70,7 @@ export class ScrapeEvents {
    }
  }
-  static async logJobEvent(job: Job | JobId, event: ScrapeQueueEvent["event"]) {
+  static async logJobEvent(job: Job | any, event: ScrapeQueueEvent["event"]) {
    try {
      await this.insert(((job as any).id ? (job as any).id : job) as string, {
        type: "queue",
--- a/apps/api/src/lib/supabase-jobs.ts
+++ b/apps/api/src/lib/supabase-jobs.ts
@ -17,3 +17,21 @@ export const supabaseGetJobById = async (jobId: string) => {
  return data;
 }
 export const supabaseGetJobsById = async (jobIds: string[]) => {
  const { data, error } = await supabase_service
    .from('firecrawl_jobs')
    .select('*')
    .in('job_id', jobIds);
  if (error) {
    return [];
  }
  if (!data) {
    return [];
  }
  return data;
 }
--- a/apps/api/src/lib/validateUrl.test.ts
+++ b/apps/api/src/lib/validateUrl.test.ts
@ -0,0 +1,159 @@
 import { isSameDomain, removeDuplicateUrls } from "./validateUrl";
 import { isSameSubdomain } from "./validateUrl";
 describe("isSameDomain", () => {
  it("should return true for a subdomain", () => {
    const result = isSameDomain("http://sub.example.com", "http://example.com");
    expect(result).toBe(true);
  });
  it("should return true for the same domain", () => {
    const result = isSameDomain("http://example.com", "http://example.com");
    expect(result).toBe(true);
  });
  it("should return false for different domains", () => {
    const result = isSameDomain("http://example.com", "http://another.com");
    expect(result).toBe(false);
  });
  it("should return true for a subdomain with different protocols", () => {
    const result = isSameDomain("https://sub.example.com", "http://example.com");
    expect(result).toBe(true);
  });
  it("should return false for invalid URLs", () => {
    const result = isSameDomain("invalid-url", "http://example.com");
    expect(result).toBe(false);
    const result2 = isSameDomain("http://example.com", "invalid-url");
    expect(result2).toBe(false);
  });
  it("should return true for a subdomain with www prefix", () => {
    const result = isSameDomain("http://www.sub.example.com", "http://example.com");
    expect(result).toBe(true);
  });
  it("should return true for the same domain with www prefix", () => {
    const result = isSameDomain("http://docs.s.s.example.com", "http://example.com");
    expect(result).toBe(true);
  });
 });
 describe("isSameSubdomain", () => {
  it("should return false for a subdomain", () => {
    const result = isSameSubdomain("http://example.com", "http://docs.example.com");
    expect(result).toBe(false);
  });
  it("should return true for the same subdomain", () => {
    const result = isSameSubdomain("http://docs.example.com", "http://docs.example.com");
    expect(result).toBe(true);
  });
  it("should return false for different subdomains", () => {
    const result = isSameSubdomain("http://docs.example.com", "http://blog.example.com");
    expect(result).toBe(false);
  });
  it("should return false for different domains", () => {
    const result = isSameSubdomain("http://example.com", "http://another.com");
    expect(result).toBe(false);
  });
  it("should return false for invalid URLs", () => {
    const result = isSameSubdomain("invalid-url", "http://example.com");
    expect(result).toBe(false);
    const result2 = isSameSubdomain("http://example.com", "invalid-url");
    expect(result2).toBe(false);
  });
  it("should return true for the same subdomain with different protocols", () => {
    const result = isSameSubdomain("https://docs.example.com", "http://docs.example.com");
    expect(result).toBe(true);
  });
  it("should return true for the same subdomain with www prefix", () => {
    const result = isSameSubdomain("http://www.docs.example.com", "http://docs.example.com");
    expect(result).toBe(true);
  });
  it("should return false for a subdomain with www prefix and different subdomain", () => {
    const result = isSameSubdomain("http://www.docs.example.com", "http://blog.example.com");
    expect(result).toBe(false);
  });
 });
 describe("removeDuplicateUrls", () => {
  it("should remove duplicate URLs with different protocols", () => {
    const urls = [
      "http://example.com",
      "https://example.com",
      "http://www.example.com",
      "https://www.example.com"
    ];
    const result = removeDuplicateUrls(urls);
    expect(result).toEqual(["https://example.com"]);
  });
  it("should keep URLs with different paths", () => {
    const urls = [
      "https://example.com/page1",
      "https://example.com/page2",
      "https://example.com/page1?param=1",
      "https://example.com/page1#section1"
    ];
    const result = removeDuplicateUrls(urls);
    expect(result).toEqual([
      "https://example.com/page1",
      "https://example.com/page2",
      "https://example.com/page1?param=1",
      "https://example.com/page1#section1"
    ]);
  });
  it("should prefer https over http", () => {
    const urls = [
      "http://example.com",
      "https://example.com"
    ];
    const result = removeDuplicateUrls(urls);
    expect(result).toEqual(["https://example.com"]);
  });
  it("should prefer non-www over www", () => {
    const urls = [
      "https://www.example.com",
      "https://example.com"
    ];
    const result = removeDuplicateUrls(urls);
    expect(result).toEqual(["https://example.com"]);
  });
  it("should handle empty input", () => {
    const urls: string[] = [];
    const result = removeDuplicateUrls(urls);
    expect(result).toEqual([]);
  });
  it("should handle URLs with different cases", () => {
    const urls = [
      "https://EXAMPLE.com",
      "https://example.com"
    ];
    const result = removeDuplicateUrls(urls);
    expect(result).toEqual(["https://EXAMPLE.com"]);
  });
  it("should handle URLs with trailing slashes", () => {
    const urls = [
      "https://example.com",
      "https://example.com/"
    ];
    const result = removeDuplicateUrls(urls);
    expect(result).toEqual(["https://example.com"]);
  });
 });
--- a/apps/api/src/lib/validateUrl.ts
+++ b/apps/api/src/lib/validateUrl.ts
@ -0,0 +1,170 @@
 export const protocolIncluded = (url: string) => {
  // if :// not in the start of the url assume http (maybe https?)
  // regex checks if :// appears before any .
  return /^([^.:]+:\/\/)/.test(url);
 };
 const getURLobj = (s: string) => {
  // URL fails if we dont include the protocol ie google.com
  let error = false;
  let urlObj = {};
  try {
    urlObj = new URL(s);
  } catch (err) {
    error = true;
  }
  return { error, urlObj };
 };
 export const checkAndUpdateURL = (url: string) => {
  if (!protocolIncluded(url)) {
    url = `http://${url}`;
  }
  const { error, urlObj } = getURLobj(url);
  if (error) {
    throw new Error("Invalid URL");
  }
  const typedUrlObj = urlObj as URL;
  if (typedUrlObj.protocol !== "http:" && typedUrlObj.protocol !== "https:") {
    throw new Error("Invalid URL");
  }
  return { urlObj: typedUrlObj, url: url };
 };
 export const checkUrl = (url: string) => {
  const { error, urlObj } = getURLobj(url);
  if (error) {
    throw new Error("Invalid URL");
  }
  const typedUrlObj = urlObj as URL;
  if (typedUrlObj.protocol !== "http:" && typedUrlObj.protocol !== "https:") {
    throw new Error("Invalid URL");
  }
  if ((url.split(".")[0].match(/:/g) || []).length !== 1) {
    throw new Error("Invalid URL. Invalid protocol."); // for this one: http://http://example.com
  }
  return url;
 };
 /**
 * Same domain check
 * It checks if the domain of the url is the same as the base url
 * It accounts true for subdomains and www.subdomains
 * @param url 
 * @param baseUrl 
 * @returns 
 */
 export function isSameDomain(url: string, baseUrl: string) {
  const { urlObj: urlObj1, error: error1 } = getURLobj(url);
  const { urlObj: urlObj2, error: error2 } = getURLobj(baseUrl);
  if (error1 || error2) {
    return false;
  }
  const typedUrlObj1 = urlObj1 as URL;
  const typedUrlObj2 = urlObj2 as URL;
  const cleanHostname = (hostname: string) => {
    return hostname.startsWith('www.') ? hostname.slice(4) : hostname;
  };
  const domain1 = cleanHostname(typedUrlObj1.hostname).split('.').slice(-2).join('.');
  const domain2 = cleanHostname(typedUrlObj2.hostname).split('.').slice(-2).join('.');
  return domain1 === domain2;
 }
 export function isSameSubdomain(url: string, baseUrl: string) {
  const { urlObj: urlObj1, error: error1 } = getURLobj(url);
  const { urlObj: urlObj2, error: error2 } = getURLobj(baseUrl);
  if (error1 || error2) {
    return false;
  }
  const typedUrlObj1 = urlObj1 as URL;
  const typedUrlObj2 = urlObj2 as URL;
  const cleanHostname = (hostname: string) => {
    return hostname.startsWith('www.') ? hostname.slice(4) : hostname;
  };
  const domain1 = cleanHostname(typedUrlObj1.hostname).split('.').slice(-2).join('.');
  const domain2 = cleanHostname(typedUrlObj2.hostname).split('.').slice(-2).join('.');
  const subdomain1 = cleanHostname(typedUrlObj1.hostname).split('.').slice(0, -2).join('.');
  const subdomain2 = cleanHostname(typedUrlObj2.hostname).split('.').slice(0, -2).join('.');
  // Check if the domains are the same and the subdomains are the same
  return domain1 === domain2 && subdomain1 === subdomain2;
 }
 export const checkAndUpdateURLForMap = (url: string) => {
  if (!protocolIncluded(url)) {
    url = `http://${url}`;
  }
  // remove last slash if present
  if (url.endsWith("/")) {
    url = url.slice(0, -1);
  }
  const { error, urlObj } = getURLobj(url);
  if (error) {
    throw new Error("Invalid URL");
  }
  const typedUrlObj = urlObj as URL;
  if (typedUrlObj.protocol !== "http:" && typedUrlObj.protocol !== "https:") {
    throw new Error("Invalid URL");
  }
  // remove any query params
  url = url.split("?")[0].trim();
  return { urlObj: typedUrlObj, url: url };
 };
 export function removeDuplicateUrls(urls: string[]): string[] {
  const urlMap = new Map<string, string>();
  for (const url of urls) {
    const parsedUrl = new URL(url);
    const protocol = parsedUrl.protocol;
    const hostname = parsedUrl.hostname.replace(/^www\./, '');
    const path = parsedUrl.pathname + parsedUrl.search + parsedUrl.hash;
    const key = `${hostname}${path}`;
    if (!urlMap.has(key)) {
      urlMap.set(key, url);
    } else {
      const existingUrl = new URL(urlMap.get(key)!);
      const existingProtocol = existingUrl.protocol;
      if (protocol === 'https:' && existingProtocol === 'http:') {
        urlMap.set(key, url);
      } else if (protocol === existingProtocol && !parsedUrl.hostname.startsWith('www.') && existingUrl.hostname.startsWith('www.')) {
        urlMap.set(key, url);
      }
    }
  }
  return [...new Set(Array.from(urlMap.values()))];
 }
--- a/apps/api/src/main/runWebScraper.ts
+++ b/apps/api/src/main/runWebScraper.ts
@ -1,4 +1,4 @@
-import { Job } from "bull";
+import { Job } from "bullmq";
 import {
  CrawlResult,
  WebScraperOptions,
@ -15,15 +15,23 @@ import { ScrapeEvents } from "../lib/scrape-events";
 export async function startWebScraperPipeline({
  job,
  token,
 }: {
  job: Job<WebScraperOptions>;
  token: string;
 }) {
  let partialDocs: Document[] = [];
  return (await runWebScraper({
    url: job.data.url,
    mode: job.data.mode,
    crawlerOptions: job.data.crawlerOptions,
-    pageOptions: job.data.pageOptions,
+    extractorOptions: job.data.extractorOptions,
    pageOptions: {
      ...job.data.pageOptions,
      ...(job.data.crawl_id ? ({
        includeRawHtml: true,
      }): {}),
    },
    inProgress: (progress) => {
      Logger.debug(`🐂 Job in progress ${job.id}`);
      if (progress.currentDocument) {
@ -31,20 +39,22 @@ export async function startWebScraperPipeline({
        if (partialDocs.length > 50) {
          partialDocs = partialDocs.slice(-50);
        }
-        job.progress({ ...progress, partialDocs: partialDocs });
+        // job.updateProgress({ ...progress, partialDocs: partialDocs });
      }
    },
-    onSuccess: (result) => {
+    onSuccess: (result, mode) => {
      Logger.debug(`🐂 Job completed ${job.id}`);
-      saveJob(job, result);
+      saveJob(job, result, token, mode);
    },
    onError: (error) => {
      Logger.error(`🐂 Job failed ${job.id}`);
      ScrapeEvents.logJobEvent(job, "failed");
-      job.moveToFailed(error);
+      job.moveToFailed(error, token, false);
    },
    team_id: job.data.team_id,
    bull_job_id: job.id.toString(),
    priority: job.opts.priority,
    is_scrape: job.data.is_scrape ?? false,
  })) as { success: boolean; message: string; docs: Document[] };
 }
 export async function runWebScraper({
@ -52,11 +62,14 @@ export async function runWebScraper({
  mode,
  crawlerOptions,
  pageOptions,
  extractorOptions,
  inProgress,
  onSuccess,
  onError,
  team_id,
  bull_job_id,
  priority,
  is_scrape=false,
 }: RunWebScraperParams): Promise<RunWebScraperResult> {
  try {
    const provider = new WebScraperDataProvider();
@ -65,17 +78,22 @@ export async function runWebScraper({
        jobId: bull_job_id,
        mode: mode,
        urls: [url],
        extractorOptions,
        crawlerOptions: crawlerOptions,
        pageOptions: pageOptions,
        bullJobId: bull_job_id,
        priority,
      });
    } else {
      await provider.setOptions({
        jobId: bull_job_id,
        mode: mode,
        urls: url.split(","),
        extractorOptions,
        crawlerOptions: crawlerOptions,
        pageOptions: pageOptions,
        priority,
        teamId: team_id
      });
    }
    const docs = (await provider.getDocuments(false, (progress: Progress) => {
@ -97,21 +115,24 @@ export async function runWebScraper({
            return { url: doc.metadata.sourceURL };
          }
        })
-      : docs.filter((doc) => doc.content.trim().length > 0);
+      : docs;
-    const billingResult = await billTeam(team_id, filteredDocs.length);
+    if(is_scrape === false) {
-
+      const billingResult = await billTeam(team_id, filteredDocs.length);
-    if (!billingResult.success) {
+      if (!billingResult.success) {
-      // throw new Error("Failed to bill team, no subscription was found");
+        // throw new Error("Failed to bill team, no subscription was found");
-      return {
+        return {
-        success: false,
+          success: false,
-        message: "Failed to bill team, no subscription was found",
+          message: "Failed to bill team, no subscription was found",
-        docs: [],
+          docs: [],
-      };
+        };
      }
    }
    // This is where the returnvalue from the job is set
-    onSuccess(filteredDocs);
+    onSuccess(filteredDocs, mode);
    // this return doesn't matter too much for the job completion result
    return { success: true, message: "", docs: filteredDocs };
@ -121,7 +142,7 @@ export async function runWebScraper({
  }
 }
-const saveJob = async (job: Job, result: any) => {
+const saveJob = async (job: Job, result: any, token: string, mode: string) => {
  try {
    if (process.env.USE_DB_AUTHENTICATION === "true") {
      const { data, error } = await supabase_service
@ -130,17 +151,21 @@ const saveJob = async (job: Job, result: any) => {
        .eq("job_id", job.id);
      if (error) throw new Error(error.message);
-      try {
+      // try {
-        await job.moveToCompleted(null, false, false);
+      //   if (mode === "crawl") {
-      } catch (error) {
+      //     await job.moveToCompleted(null, token, false);
-        // I think the job won't exist here anymore
+      //   } else {
-      }
+      //     await job.moveToCompleted(result, token, false);
-    } else {
+      //   }
-      try {
+      // } catch (error) {
-        await job.moveToCompleted(result, false, false);
+      //   // I think the job won't exist here anymore
-      } catch (error) {
+      // }
-        // I think the job won't exist here anymore
+    // } else {
-      }
+    //   try {
    //     await job.moveToCompleted(result, token, false);
    //   } catch (error) {
    //     // I think the job won't exist here anymore
    //   }
    }
    ScrapeEvents.logJobEvent(job, "completed");
  } catch (error) {
--- a/apps/api/src/routes/admin.ts
+++ b/apps/api/src/routes/admin.ts
@ -1,10 +1,11 @@
 import express from "express";
-import { redisHealthController } from "../controllers/admin/redis-health";
+import { redisHealthController } from "../controllers/v0/admin/redis-health";
 import {
  autoscalerController,
  checkQueuesController,
  cleanBefore24hCompleteJobsController,
  queuesController,
-} from "../controllers/admin/queue";
+} from "../controllers/v0/admin/queue";
 export const adminRouter = express.Router();
@ -27,3 +28,8 @@ adminRouter.get(
  `/admin/${process.env.BULL_AUTH_KEY}/queues`,
  queuesController
 );
 adminRouter.get(
  `/admin/${process.env.BULL_AUTH_KEY}/autoscaler`,
  autoscalerController
 );
--- a/apps/api/src/routes/v0.ts
+++ b/apps/api/src/routes/v0.ts
@ -1,14 +1,14 @@
 import express from "express";
-import { crawlController } from "../../src/controllers/crawl";
+import { crawlController } from "../../src/controllers/v0/crawl";
-import { crawlStatusController } from "../../src/controllers/crawl-status";
+import { crawlStatusController } from "../../src/controllers/v0/crawl-status";
-import { scrapeController } from "../../src/controllers/scrape";
+import { scrapeController } from "../../src/controllers/v0/scrape";
-import { crawlPreviewController } from "../../src/controllers/crawlPreview";
+import { crawlPreviewController } from "../../src/controllers/v0/crawlPreview";
-import { crawlJobStatusPreviewController } from "../../src/controllers/status";
+import { crawlJobStatusPreviewController } from "../../src/controllers/v0/status";
-import { searchController } from "../../src/controllers/search";
+import { searchController } from "../../src/controllers/v0/search";
-import { crawlCancelController } from "../../src/controllers/crawl-cancel";
+import { crawlCancelController } from "../../src/controllers/v0/crawl-cancel";
-import { keyAuthController } from "../../src/controllers/keyAuth";
+import { keyAuthController } from "../../src/controllers/v0/keyAuth";
-import { livenessController } from "../controllers/liveness";
+import { livenessController } from "../controllers/v0/liveness";
-import { readinessController } from "../controllers/readiness";
+import { readinessController } from "../controllers/v0/readiness";
 export const v0Router = express.Router();
--- a/apps/api/src/routes/v1.ts
+++ b/apps/api/src/routes/v1.ts
@ -0,0 +1,150 @@
 import express, { NextFunction, Request, Response } from "express";
 import { crawlController } from "../controllers/v1/crawl";
 // import { crawlStatusController } from "../../src/controllers/v1/crawl-status";
 import { scrapeController } from "../../src/controllers/v1/scrape";
 import { crawlStatusController } from "../controllers/v1/crawl-status";
 import { mapController } from "../controllers/v1/map";
 import { ErrorResponse, RequestWithAuth, RequestWithMaybeAuth } from "../controllers/v1/types";
 import { RateLimiterMode } from "../types";
 import { authenticateUser } from "../controllers/auth";
 import { createIdempotencyKey } from "../services/idempotency/create";
 import { validateIdempotencyKey } from "../services/idempotency/validate";
 import { checkTeamCredits } from "../services/billing/credit_billing";
 import expressWs from "express-ws";
 import { crawlStatusWSController } from "../controllers/v1/crawl-status-ws";
 import { isUrlBlocked } from "../scraper/WebScraper/utils/blocklist";
 import { crawlCancelController } from "../controllers/v1/crawl-cancel";
 import { Logger } from "../lib/logger";
 // import { crawlPreviewController } from "../../src/controllers/v1/crawlPreview";
 // import { crawlJobStatusPreviewController } from "../../src/controllers/v1/status";
 // import { searchController } from "../../src/controllers/v1/search";
 // import { crawlCancelController } from "../../src/controllers/v1/crawl-cancel";
 // import { keyAuthController } from "../../src/controllers/v1/keyAuth";
 // import { livenessController } from "../controllers/v1/liveness";
 // import { readinessController } from "../controllers/v1/readiness";
 function checkCreditsMiddleware(minimum?: number): (req: RequestWithAuth, res: Response, next: NextFunction) => void {
    return (req, res, next) => {
        (async () => {
            if (!minimum && req.body) {
                minimum = (req.body as any)?.limit ?? 1;
            }
            const { success, message, remainingCredits } = await checkTeamCredits(req.auth.team_id, minimum);
            if (!success) {
                Logger.error(`Insufficient credits: ${JSON.stringify({ team_id: req.auth.team_id, minimum, remainingCredits })}`);
                return res.status(402).json({ success: false, error: "Insufficient credits" });
            }
            req.account = { remainingCredits }
            next();
        })()
            .catch(err => next(err));
    };
 }
 export function authMiddleware(rateLimiterMode: RateLimiterMode): (req: RequestWithMaybeAuth, res: Response, next: NextFunction) => void {
    return (req, res, next) => {
        (async () => {
            const { success, team_id, error, status, plan } = await authenticateUser(
                req,
                res,
                rateLimiterMode,
            );
            if (!success) {
                return res.status(status).json({ success: false, error });
            }
            req.auth = { team_id, plan };
            next();
        })()
            .catch(err => next(err));
    }
 }
 function idempotencyMiddleware(req: Request, res: Response, next: NextFunction) {
    (async () => {
        if (req.headers["x-idempotency-key"]) {
            const isIdempotencyValid = await validateIdempotencyKey(req);
            if (!isIdempotencyValid) {
                return res.status(409).json({ success: false, error: "Idempotency key already used" });
            }
            createIdempotencyKey(req);
        }
        next();
    })()
        .catch(err => next(err));
 }
 function blocklistMiddleware(req: Request, res: Response, next: NextFunction) {
    if (req.body.url && isUrlBlocked(req.body.url)) {
        return res.status(403).json({ success: false, error: "URL is blocked. Firecrawl currently does not support social media scraping due to policy restrictions." });
    }
    next();
 }
 function wrap(controller: (req: Request, res: Response) => Promise<any>): (req: Request, res: Response, next: NextFunction) => any {
    return (req, res, next) => {
        controller(req, res)
            .catch(err => next(err))
    }
 }
 expressWs(express());
 export const v1Router = express.Router();
 v1Router.post(
    "/scrape",
    blocklistMiddleware,
    authMiddleware(RateLimiterMode.Scrape),
    checkCreditsMiddleware(1),
    wrap(scrapeController)
 );
 v1Router.post(
    "/crawl",
    blocklistMiddleware,
    authMiddleware(RateLimiterMode.Crawl),
    idempotencyMiddleware,
    checkCreditsMiddleware(),
    wrap(crawlController)
 );
 v1Router.post(
    "/map",
    blocklistMiddleware,
    authMiddleware(RateLimiterMode.Map),
    checkCreditsMiddleware(1),
    wrap(mapController)
 );
 v1Router.get(
    "/crawl/:jobId",
    authMiddleware(RateLimiterMode.CrawlStatus),
    wrap(crawlStatusController)
 );
 v1Router.ws(
    "/crawl/:jobId",
    crawlStatusWSController
 );
 // v1Router.post("/crawlWebsitePreview", crawlPreviewController);
 v1Router.delete(
  "/crawl/:jobId",
  authMiddleware(RateLimiterMode.Crawl),
  crawlCancelController
 );
 // v1Router.get("/checkJobStatus/:jobId", crawlJobStatusPreviewController);
 // // Auth route for key based authentication
 // v1Router.get("/keyAuth", keyAuthController);
 // // Search routes
 // v0Router.post("/search", searchController);
 // Health/Probe routes
 // v1Router.get("/health/liveness", livenessController);
 // v1Router.get("/health/readiness", readinessController);
--- a/apps/api/src/run-req.ts
+++ b/apps/api/src/run-req.ts
@ -0,0 +1,175 @@
 import axios from "axios";
 import { promises as fs } from "fs";
 import { v4 as uuidV4 } from "uuid";
 interface Result {
  start_url: string;
  job_id?: string;
  idempotency_key?: string;
  result_data_jsonb?: any;
 }
 async function sendCrawl(result: Result): Promise<string | undefined> {
  const idempotencyKey = uuidV4();
  const url = result.start_url;
  try {
    const response = await axios.post(
      "https://staging-firecrawl-scraper-js.fly.dev/v0/crawl",
      {
        url: url,
        crawlerOptions: {
          limit: 75,
        },
        pageOptions: {
          includeHtml: true,
          replaceAllPathsWithAbsolutePaths: true,
          waitFor: 1000,
        },
      },
      {
        headers: {
          "Content-Type": "application/json",
          Authorization: `Bearer `,
        },
      }
    );
    result.idempotency_key = idempotencyKey;
    return response.data.jobId;
  } catch (error) {
    console.error("Error sending crawl:", error);
    return undefined;
  }
 }
 async function getContent(result: Result): Promise<boolean> {
  let attempts = 0;
  while (attempts < 120) {
    // Reduce the number of attempts to speed up
    try {
      const response = await axios.get(
        `https://staging-firecrawl-scraper-js.fly.dev/v0/crawl/status/${result.job_id}`,
        {
          headers: {
            "Content-Type": "application/json",
            Authorization: `Bearer `,
          },
        }
      );
      if (response.data.status === "completed") {
        result.result_data_jsonb = response.data.data;
        // Job actually completed
        return true;
      }
    } catch (error) {
      console.error("Error getting content:", error);
    }
    const randomSleep = Math.floor(Math.random() * 15000) + 5000;
    await new Promise((resolve) => setTimeout(resolve, randomSleep)); // Reduce sleep time to 1.5 seconds
    attempts++;
  }
  // Set result as null if timed out
  result.result_data_jsonb = null;
  return false;
 }
 async function processResults(results: Result[]): Promise<void> {
  let processedCount = 0;
  let starterCount = 0;
  const queue: Result[] = [];
  const processedUrls = new Set<string>();
  // Initialize the queue with the first 1000 results
  for (let i = 0; i < Math.min(100, results.length); i++) {
    queue.push(results[i]);
    processedUrls.add(results[i].start_url);
  }
  // Function to process a single result
  const processSingleResult = async (result: Result) => {
    const jobId = await sendCrawl(result);
    if (jobId) {
      console.log(`Job requested count: ${starterCount}`);
      starterCount++;
      result.job_id = jobId;
      processedCount++;
      // Save the result to the file
      try {
        // Save job id along with the start_url
        const resultWithJobId = results.map(r => ({
          start_url: r.start_url,
          job_id: r.job_id,
        }));
        await fs.writeFile(
          "results_with_job_id_4000_6000.json",
          JSON.stringify(resultWithJobId, null, 4)
        );
      } catch (error) {
        console.error("Error writing to results_with_content.json:", error);
      }
      // Add a new result to the queue if there are more results to process
      // if (processedCount < results.length) {
      //   for (let i = queue.length; i < results.length; i++) {
      //     if (!processedUrls.has(results[i].start_url)) {
      //       const nextResult = results[i];
      //       console.log("Next result:", nextResult.start_url);
      //       queue.push(nextResult);
      //       processedUrls.add(nextResult.start_url);
      //       console.log(`Queue length: ${queue.length}`);
      //       processSingleResult(nextResult);
      //       break;
      //     }
      //   }
      // }
    }
  };
  // Start processing the initial queue concurrently
  // for (let i = 0; i < queue.length; i++) {
  //   processSingleResult(queue[i]);
  //   if ((i + 1) % 500 === 0) {
  //     console.log(`Processed ${i + 1} results, waiting for 1 minute before adding the next batch...`);
  //     await new Promise(resolve => setTimeout(resolve, 60 * 1000)); // Wait for 1 minute
  //   }
  // }
  // Start processing the initial queue concurrently
  // await Promise.all(queue.map(result => processSingleResult(result)));
  for (let i = 0; i < results.length; i += 100) {
    const batch = results.slice(i, i + 100);
    Promise.all(batch.map((result) => processSingleResult(result)))
      .then(() => {
        console.log(`Processed ${i + 100} results.`);
      })
      .catch((error) => {
        console.error(`Error processing batch starting at index ${i}:`, error);
      });
    await new Promise((resolve) => setTimeout(resolve, 60 * 1000)); // Wait for 1 minute
  }
 }
 // Example call
 async function getStartUrls(): Promise<Result[]> {
  try {
    const data = await fs.readFile("starturls.json", "utf-8");
    return JSON.parse(data);
  } catch (error) {
    console.error("Error reading starturls.json:", error);
    return [];
  }
 }
 async function main() {
  const results: Result[] = (await getStartUrls()).slice(3999, 6000);
  // console.log(results.map((r) => r.start_url).slice(0, 3));
  processResults(results)
    .then(() => {
      console.log("All results processed.");
    })
    .catch((error) => {
      console.error("Error processing results:", error);
    });
 }
 main();
--- a/apps/api/src/scraper/WebScraper/tests/single_url.test.ts
+++ b/apps/api/src/scraper/WebScraper/tests/single_url.test.ts
@ -24,14 +24,15 @@ describe('scrapSingleUrl', () => {
 });
 it('should return a list of links on the firecrawl.ai page', async () => {
-  const url = 'https://example.com';
+  const url = 'https://flutterbricks.com';
  const pageOptions: PageOptions = { includeHtml: true };
  const result = await scrapSingleUrl("TEST", url, pageOptions);
  // Check if the result contains a list of links
  expect(result.linksOnPage).toBeDefined();
  console.log({result});
  expect(Array.isArray(result.linksOnPage)).toBe(true);
  expect(result.linksOnPage.length).toBeGreaterThan(0);
-  expect(result.linksOnPage).toContain('https://www.iana.org/domains/example')
+  expect(result.linksOnPage).toContain('https://flutterbricks.com/features')
-}, 10000);
+}, 15000);
--- a/apps/api/src/scraper/WebScraper/crawler.ts
+++ b/apps/api/src/scraper/WebScraper/crawler.ts
@ -1,4 +1,4 @@
-import axios from "axios";
+import axios, { AxiosError } from "axios";
 import cheerio, { load } from "cheerio";
 import { URL } from "url";
 import { getLinksFromSitemap } from "./sitemap";
@ -22,7 +22,7 @@ export class WebCrawler {
  private crawledUrls: Map<string, string> = new Map();
  private limit: number;
  private robotsTxtUrl: string;
-  private robots: any;
+  public robots: any;
  private generateImgAltText: boolean;
  private allowBackwardCrawling: boolean;
  private allowExternalContentLinks: boolean;
@ -53,8 +53,8 @@ export class WebCrawler {
    this.jobId = jobId;
    this.initialUrl = initialUrl;
    this.baseUrl = new URL(initialUrl).origin;
-    this.includes = includes ?? [];
+    this.includes = Array.isArray(includes) ? includes : [];
-    this.excludes = excludes ?? [];
+    this.excludes = Array.isArray(excludes) ? excludes : [];
    this.limit = limit;
    this.robotsTxtUrl = `${this.baseUrl}/robots.txt`;
    this.robots = robotsParser(this.robotsTxtUrl, "");
@ -66,10 +66,16 @@ export class WebCrawler {
    this.allowExternalContentLinks = allowExternalContentLinks ?? false;
  }
-  private filterLinks(sitemapLinks: string[], limit: number, maxDepth: number): string[] {
+  public filterLinks(sitemapLinks: string[], limit: number, maxDepth: number): string[] {
    return sitemapLinks
      .filter((link) => {
-        const url = new URL(link.trim(), this.baseUrl);
+        let url: URL;
        try {
          url = new URL(link.trim(), this.baseUrl);
        } catch (error) {
          Logger.debug(`Error processing link: ${link} | Error: ${error.message}`);
          return false;
        }
        const path = url.pathname;
        const depth = getURLDepth(url.toString());
@ -102,7 +108,12 @@ export class WebCrawler {
        // Normalize the initial URL and the link to account for www and non-www versions
        const normalizedInitialUrl = new URL(this.initialUrl);
-        const normalizedLink = new URL(link);
+        let normalizedLink;
        try {
          normalizedLink = new URL(link);
        } catch (_) {
          return false;
        }
        const initialHostname = normalizedInitialUrl.hostname.replace(/^www\./, '');
        const linkHostname = normalizedLink.hostname.replace(/^www\./, '');
@ -130,6 +141,25 @@ export class WebCrawler {
      .slice(0, limit);
  }
  public async getRobotsTxt(): Promise<string> {
    const response = await axios.get(this.robotsTxtUrl, { timeout: axiosTimeout });
    return response.data;
  }
  public importRobotsTxt(txt: string) {
    this.robots = robotsParser(this.robotsTxtUrl, txt);
  }
  public async tryGetSitemap(): Promise<{ url: string; html: string; }[] | null> {
    Logger.debug(`Fetching sitemap links from ${this.initialUrl}`);
    const sitemapLinks = await this.tryFetchSitemapLinks(this.initialUrl);
    if (sitemapLinks.length > 0) {
      let filteredLinks = this.filterLinks(sitemapLinks, this.limit, this.maxCrawledDepth);
      return filteredLinks.map(link => ({ url: link, html: "" }));
    }
    return null;
  }
  public async start(
    inProgress?: (progress: Progress) => void,
    pageOptions?: PageOptions,
@ -142,19 +172,17 @@ export class WebCrawler {
    Logger.debug(`Crawler starting with ${this.initialUrl}`);
    // Fetch and parse robots.txt
    try {
-      const response = await axios.get(this.robotsTxtUrl, { timeout: axiosTimeout });
+      const txt = await this.getRobotsTxt();
-      this.robots = robotsParser(this.robotsTxtUrl, response.data);
+      this.importRobotsTxt(txt);
      Logger.debug(`Crawler robots.txt fetched with ${this.robotsTxtUrl}`);
    } catch (error) {
      Logger.debug(`Failed to fetch robots.txt from ${this.robotsTxtUrl}`);
    }
    if (!crawlerOptions?.ignoreSitemap){
-      Logger.debug(`Fetching sitemap links from ${this.initialUrl}`);
+      const sm = await this.tryGetSitemap();
-      const sitemapLinks = await this.tryFetchSitemapLinks(this.initialUrl);
+      if (sm !== null) {
-      if (sitemapLinks.length > 0) {
+        return sm;
        let filteredLinks = this.filterLinks(sitemapLinks, limit, maxDepth);
        return filteredLinks.map(link => ({ url: link, html: "" }));
      }
    }
@ -241,6 +269,63 @@ export class WebCrawler {
    return Array.from(this.crawledUrls.entries()).map(([url, html]) => ({ url, html }));
  }
  public filterURL(href: string, url: string): string | null {
    let fullUrl = href;
    if (!href.startsWith("http")) {
      try {
        fullUrl = new URL(href, this.baseUrl).toString();
      } catch (_) {
        return null;
      }
    }
    let urlObj;
    try {
      urlObj = new URL(fullUrl);
    } catch (_) {
      return null;
    }
    const path = urlObj.pathname;
    if (this.isInternalLink(fullUrl)) { // INTERNAL LINKS
      if (this.isInternalLink(fullUrl) &&
        this.noSections(fullUrl) &&
        !this.matchesExcludes(path) &&
        this.isRobotsAllowed(fullUrl)
      ) {
        return fullUrl;
      }
    } else { // EXTERNAL LINKS
      if (
        this.isInternalLink(url) &&
        this.allowExternalContentLinks &&
        !this.isSocialMediaOrEmail(fullUrl) &&
        !this.matchesExcludes(fullUrl, true) &&
        !this.isExternalMainPage(fullUrl)
      ) {
        return fullUrl;
      }
    }
    return null;
  }
  public extractLinksFromHTML(html: string, url: string) {
    let links: string[] = [];
    const $ = load(html);
    $("a").each((_, element) => {
      const href = $(element).attr("href");
      if (href) {
        const u = this.filterURL(href, url);
        if (u !== null) {
          links.push(u);
        }
      }
    });
    return links;
  }
  async crawl(url: string, pageOptions: PageOptions): Promise<{url: string, html: string, pageStatusCode?: number, pageError?: string}[]> {
    if (this.visited.has(url) || !this.robots.isAllowed(url, "FireCrawlAgent")) {
      return [];
@ -284,37 +369,7 @@ export class WebCrawler {
        links.push({ url, html: content, pageStatusCode, pageError });
      }
-      $("a").each((_, element) => {
+      links.push(...this.extractLinksFromHTML(content, url).map(url => ({ url, html: content, pageStatusCode, pageError })));
        const href = $(element).attr("href");
        if (href) {
          let fullUrl = href;
          if (!href.startsWith("http")) {
            fullUrl = new URL(href, this.baseUrl).toString();
          }
          const urlObj = new URL(fullUrl);
          const path = urlObj.pathname;
          if (this.isInternalLink(fullUrl)) { // INTERNAL LINKS
            if (this.isInternalLink(fullUrl) &&
              this.noSections(fullUrl) &&
              !this.matchesExcludes(path) &&
              this.isRobotsAllowed(fullUrl)
            ) {
              links.push({ url: fullUrl, html: content, pageStatusCode, pageError });
            }
          } else { // EXTERNAL LINKS
            if (
              this.isInternalLink(url) &&
              this.allowExternalContentLinks &&
              !this.isSocialMediaOrEmail(fullUrl) &&
              !this.matchesExcludes(fullUrl, true) &&
              !this.isExternalMainPage(fullUrl)
            ) {
              links.push({ url: fullUrl, html: content, pageStatusCode, pageError });
            }
          }
        }
      });
      if (this.visited.size === 1) {
        return links;
@ -465,9 +520,13 @@ export class WebCrawler {
      }
    } catch (error) { 
      Logger.debug(`Failed to fetch sitemap with axios from ${sitemapUrl}: ${error}`);
-      const response = await getLinksFromSitemap({ sitemapUrl, mode: 'fire-engine' });
+      if (error instanceof AxiosError && error.response?.status === 404) {
-      if (response) {
+        // ignore 404
-        sitemapLinks = response;
+      } else {
        const response = await getLinksFromSitemap({ sitemapUrl, mode: 'fire-engine' });
        if (response) {
          sitemapLinks = response;
        }
      }
    }
@ -480,7 +539,11 @@ export class WebCrawler {
        }
      } catch (error) {
        Logger.debug(`Failed to fetch sitemap from ${baseUrlSitemap}: ${error}`);
-        sitemapLinks = await getLinksFromSitemap({ sitemapUrl: baseUrlSitemap, mode: 'fire-engine' });
+        if (error instanceof AxiosError && error.response?.status === 404) {
          // ignore 404
        } else {
          sitemapLinks = await getLinksFromSitemap({ sitemapUrl: baseUrlSitemap, mode: 'fire-engine' });
        }
      }
    }
--- a/apps/api/src/scraper/WebScraper/index.ts
+++ b/apps/api/src/scraper/WebScraper/index.ts
@ -16,7 +16,6 @@ import {
  replacePathsWithAbsolutePaths,
 } from "./utils/replacePaths";
 import { generateCompletions } from "../../lib/LLM-extraction";
 import { getWebScraperQueue } from "../../../src/services/queue-service";
 import { fetchAndProcessDocx } from "./utils/docxProcessor";
 import { getAdjustedMaxDepth, getURLDepth } from "./utils/maxDepthUtils";
 import { Logger } from "../../lib/logger";
@ -44,6 +43,8 @@ export class WebScraperDataProvider {
  private crawlerMode: string = "default";
  private allowBackwardCrawling: boolean = false;
  private allowExternalContentLinks: boolean = false;
  private priority?: number;
  private teamId?: string;
  authorize(): void {
    throw new Error("Method not implemented.");
@ -72,7 +73,9 @@ export class WebScraperDataProvider {
            url,
            this.pageOptions,
            this.extractorOptions,
-            existingHTML
+            existingHTML,
            this.priority,
            this.teamId,
          );
          processedUrls++;
          if (inProgress) {
@ -88,21 +91,6 @@ export class WebScraperDataProvider {
          results[i + index] = result;
        })
      );
      try {
        if (this.mode === "crawl" && this.bullJobId) {
          const job = await getWebScraperQueue().getJob(this.bullJobId);
          const jobStatus = await job.getState();
          if (jobStatus === "failed") {
            Logger.info(
              "Job has failed or has been cancelled by the user. Stopping the job..."
            );
            return [] as Document[];
          }
        }
      } catch (error) {
        Logger.error(error.message);
        return [] as Document[];
      }
    }
    return results.filter((result) => result !== null) as Document[];
  }
@ -306,7 +294,16 @@ export class WebScraperDataProvider {
      documents = await this.getSitemapData(this.urls[0], documents);
    }
-    documents = this.applyPathReplacements(documents);
+    if (this.pageOptions.includeMarkdown) {
      documents = this.applyPathReplacements(documents);
    }
    if (!this.pageOptions.includeHtml) {
      for (let document of documents) {
        delete document.html;
      }
    }
    // documents = await this.applyImgAltText(documents);
    if (
      (this.extractorOptions.mode === "llm-extraction" ||
@ -359,6 +356,7 @@ export class WebScraperDataProvider {
        });
        return {
          content: content,
          markdown: content,
          metadata: { sourceURL: pdfLink, pageStatusCode, pageError },
          provider: "web-scraper",
        };
@ -581,12 +579,20 @@ export class WebScraperDataProvider {
    this.limit = options.crawlerOptions?.limit ?? 10000;
    this.generateImgAltText =
      options.crawlerOptions?.generateImgAltText ?? false;
-    this.pageOptions = options.pageOptions ?? {
+    this.pageOptions = {
-      onlyMainContent: false,
+      onlyMainContent: options.pageOptions?.onlyMainContent ?? false,
-      includeHtml: false,
+      includeHtml: options.pageOptions?.includeHtml ?? false,
-      replaceAllPathsWithAbsolutePaths: false,
+      replaceAllPathsWithAbsolutePaths: options.pageOptions?.replaceAllPathsWithAbsolutePaths ?? true,
-      parsePDF: true,
+      parsePDF: options.pageOptions?.parsePDF ?? true,
-      removeTags: [],
+      onlyIncludeTags: options.pageOptions?.onlyIncludeTags ?? [],
      removeTags: options.pageOptions?.removeTags ?? [],
      includeMarkdown: options.pageOptions?.includeMarkdown ?? true,
      includeRawHtml: options.pageOptions?.includeRawHtml ?? false,
      waitFor: options.pageOptions?.waitFor ?? undefined,
      headers: options.pageOptions?.headers ?? undefined,
      includeLinks: options.pageOptions?.includeLinks ?? true,
      fullPageScreenshot: options.pageOptions?.fullPageScreenshot ?? false,
      screenshot: options.pageOptions?.screenshot ?? false,
    };
    this.extractorOptions = options.extractorOptions ?? { mode: "markdown" };
    this.replaceAllPathsWithAbsolutePaths =
@ -608,6 +614,8 @@ export class WebScraperDataProvider {
      options.crawlerOptions?.allowBackwardCrawling ?? false;
    this.allowExternalContentLinks =
      options.crawlerOptions?.allowExternalContentLinks ?? false;
    this.priority = options.priority;
    this.teamId = options.teamId ?? null;
    // make sure all urls start with https://
    this.urls = this.urls.map((url) => {
--- a/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts
+++ b/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts
@ -5,6 +5,7 @@ import { generateRequestParams } from "../single_url";
 import { fetchAndProcessPdf } from "../utils/pdfProcessor";
 import { universalTimeout } from "../global";
 import { Logger } from "../../../lib/logger";
 import * as Sentry from "@sentry/node";
 /**
 * Scrapes a URL with Fire-Engine
@ -22,19 +23,23 @@ export async function scrapWithFireEngine({
  waitFor = 0,
  screenshot = false,
  fullPageScreenshot = false,
-  pageOptions = { parsePDF: true },
+  pageOptions = { parsePDF: true, atsv: false, useFastMode: false, disableJsDom: false },
  fireEngineOptions = {},
  headers,
  options,
  priority,
  teamId,
 }: {
  url: string;
  waitFor?: number;
  screenshot?: boolean;
  fullPageScreenshot?: boolean;
-  pageOptions?: { scrollXPaths?: string[]; parsePDF?: boolean };
+  pageOptions?: { scrollXPaths?: string[]; parsePDF?: boolean, atsv?: boolean, useFastMode?: boolean, disableJsDom?: boolean };
  fireEngineOptions?: FireEngineOptions;
  headers?: Record<string, string>;
  options?: any;
  priority?: number;
  teamId?: string;
 }): Promise<FireEngineResponse> {
  const logParams = {
    url,
@ -49,11 +54,11 @@ export async function scrapWithFireEngine({
  try {
    const reqParams = await generateRequestParams(url);
-    const waitParam = reqParams["params"]?.wait ?? waitFor;
+    let waitParam = reqParams["params"]?.wait ?? waitFor;
-    const engineParam = reqParams["params"]?.engine ?? reqParams["params"]?.fireEngineOptions?.engine ?? fireEngineOptions?.engine  ?? "playwright";
+    let engineParam = reqParams["params"]?.engine ?? reqParams["params"]?.fireEngineOptions?.engine ?? fireEngineOptions?.engine  ?? "playwright";
-    const screenshotParam = reqParams["params"]?.screenshot ?? screenshot;
+    let screenshotParam = reqParams["params"]?.screenshot ?? screenshot;
-    const fullPageScreenshotParam = reqParams["params"]?.fullPageScreenshot ?? fullPageScreenshot;
+    let fullPageScreenshotParam = reqParams["params"]?.fullPageScreenshot ?? fullPageScreenshot;
-    const fireEngineOptionsParam : FireEngineOptions = reqParams["params"]?.fireEngineOptions ?? fireEngineOptions;
+    let fireEngineOptionsParam : FireEngineOptions = reqParams["params"]?.fireEngineOptions ?? fireEngineOptions;
    let endpoint = "/scrape";
@ -68,47 +73,101 @@ export async function scrapWithFireEngine({
      `⛏️ Fire-Engine (${engine}): Scraping ${url} | params: { wait: ${waitParam}, screenshot: ${screenshotParam}, fullPageScreenshot: ${fullPageScreenshot}, method: ${fireEngineOptionsParam?.method ?? "null"} }`
    );
    if (pageOptions?.useFastMode) {
      fireEngineOptionsParam.engine = "tlsclient";
      engine = "tlsclient";
    }
-    const response = await axios.post(
+    // atsv is only available for beta customers
-      process.env.FIRE_ENGINE_BETA_URL + endpoint,
+    const betaCustomersString = process.env.BETA_CUSTOMERS;
-      {
+    const betaCustomers = betaCustomersString ? betaCustomersString.split(",") : [];
-        url: url,
+
-        wait: waitParam,
+    if (pageOptions?.atsv && betaCustomers.includes(teamId)) {
-        screenshot: screenshotParam,
+      fireEngineOptionsParam.atsv = true;
-        fullPageScreenshot: fullPageScreenshotParam,
+    } else {
-        headers: headers,
+      pageOptions.atsv = false;
-        pageOptions: pageOptions,
+    }
-        ...fireEngineOptionsParam,
+
-      },
+    const axiosInstance = axios.create({
-      {
+      headers: { "Content-Type": "application/json" }
-        headers: {
+    });
-          "Content-Type": "application/json",
+
    const startTime = Date.now();
    const _response = await Sentry.startSpan({
      name: "Call to fire-engine"
    }, async span => {
      return await axiosInstance.post(
        process.env.FIRE_ENGINE_BETA_URL + endpoint,
        {
          url: url,
          wait: waitParam,
          screenshot: screenshotParam,
          fullPageScreenshot: fullPageScreenshotParam,
          headers: headers,
          pageOptions: pageOptions,
          disableJsDom: pageOptions?.disableJsDom ?? false,
          priority,
          engine,
          instantReturn: true,
          ...fireEngineOptionsParam,
        },
-        timeout: universalTimeout + waitParam,
+        {
-      }
+          headers: {
-    );
+            "Content-Type": "application/json",
            ...(Sentry.isInitialized() ? ({
                "sentry-trace": Sentry.spanToTraceHeader(span),
                "baggage": Sentry.spanToBaggageHeader(span),
            }) : {}),
          }
        }
      );
    });
-    if (response.status !== 200) {
+    let checkStatusResponse = await axiosInstance.get(`${process.env.FIRE_ENGINE_BETA_URL}/scrape/${_response.data.jobId}`);
    while (checkStatusResponse.data.processing && Date.now() - startTime < universalTimeout + waitParam) {
      await new Promise(resolve => setTimeout(resolve, 1000)); // wait 1 second
      checkStatusResponse = await axiosInstance.get(`${process.env.FIRE_ENGINE_BETA_URL}/scrape/${_response.data.jobId}`);
    }
    if (checkStatusResponse.data.processing) {
      Logger.debug(`⛏️ Fire-Engine (${engine}): deleting request - jobId: ${_response.data.jobId}`);
      axiosInstance.delete(
        process.env.FIRE_ENGINE_BETA_URL + `/scrape/${_response.data.jobId}`, {
          validateStatus: (status) => true
        }
      ).catch((error) => {
        Logger.debug(`⛏️ Fire-Engine (${engine}): Failed to delete request - jobId: ${_response.data.jobId} | error: ${error}`);        
      });
      Logger.debug(`⛏️ Fire-Engine (${engine}): Request timed out for ${url}`);
      logParams.error_message = "Request timed out";
      return { html: "", screenshot: "", pageStatusCode: null, pageError: "" };
    }
    if (checkStatusResponse.status !== 200 || checkStatusResponse.data.error) {
      Logger.debug(
-        `⛏️ Fire-Engine (${engine}): Failed to fetch url: ${url} \t status: ${response.status}`
+        `⛏️ Fire-Engine (${engine}): Failed to fetch url: ${url} \t status: ${checkStatusResponse.status}`
      );
-      logParams.error_message = response.data?.pageError;
+      logParams.error_message = checkStatusResponse.data?.pageError ?? checkStatusResponse.data?.error;
-      logParams.response_code = response.data?.pageStatusCode;
+      logParams.response_code = checkStatusResponse.data?.pageStatusCode;
-      if(response.data && response.data?.pageStatusCode !== 200) {
+      if(checkStatusResponse.data && checkStatusResponse.data?.pageStatusCode !== 200) {
-        Logger.debug(`⛏️ Fire-Engine (${engine}): Failed to fetch url: ${url} \t status: ${response.status}`);
+        Logger.debug(`⛏️ Fire-Engine (${engine}): Failed to fetch url: ${url} \t status: ${checkStatusResponse.data?.pageStatusCode}`);
      }
      const pageStatusCode = checkStatusResponse.data?.pageStatusCode ? checkStatusResponse.data?.pageStatusCode : checkStatusResponse.data?.error && checkStatusResponse.data?.error.includes("Dns resolution error for hostname") ? 404 : undefined;
      return {
        html: "",
        screenshot: "",
-        pageStatusCode: response.data?.pageStatusCode,
+        pageStatusCode,
-        pageError: response.data?.pageError,
+        pageError: checkStatusResponse.data?.pageError ?? checkStatusResponse.data?.error,
      };
    }
-    const contentType = response.headers["content-type"];
+    const contentType = checkStatusResponse.data.responseHeaders?.["content-type"];
    if (contentType && contentType.includes("application/pdf")) {
      const { content, pageStatusCode, pageError } = await fetchAndProcessPdf(
        url,
@ -119,18 +178,19 @@ export async function scrapWithFireEngine({
      logParams.error_message = pageError;
      return { html: content, screenshot: "", pageStatusCode, pageError };
    } else {
-      const data = response.data;
+      const data = checkStatusResponse.data;
      logParams.success =
        (data.pageStatusCode >= 200 && data.pageStatusCode < 300) ||
        data.pageStatusCode === 404;
      logParams.html = data.content ?? "";
      logParams.response_code = data.pageStatusCode;
-      logParams.error_message = data.pageError;
+      logParams.error_message = data.pageError ?? data.error;
      return {
        html: data.content ?? "",
        screenshot: data.screenshot ?? "",
        pageStatusCode: data.pageStatusCode,
-        pageError: data.pageError,
+        pageError: data.pageError ?? data.error,
      };
    }
  } catch (error) {
--- a/apps/api/src/scraper/WebScraper/scrapers/scrapingBee.ts
+++ b/apps/api/src/scraper/WebScraper/scrapers/scrapingBee.ts
@ -43,6 +43,9 @@ export async function scrapWithScrapingBee(
          transparent_status_code: "True",
        },
      });
      Logger.info(
        `⛏️ ScrapingBee: Scraping ${url}`
      );
      const contentType = response.headers["content-type"];
      if (contentType && contentType.includes("application/pdf")) {
        logParams.success = true;
--- a/apps/api/src/scraper/WebScraper/single_url.ts
+++ b/apps/api/src/scraper/WebScraper/single_url.ts
@ -24,8 +24,8 @@ import { clientSideError } from "../../strings";
 dotenv.config();
 export const baseScrapers = [
  "fire-engine",
  "fire-engine;chrome-cdp",
  "fire-engine",
  "scrapingBee",
  process.env.USE_DB_AUTHENTICATION ? undefined : "playwright",
  "scrapingBeeLoad",
@ -85,8 +85,8 @@ function getScrapingFallbackOrder(
  });
  let defaultOrder = [
    !process.env.USE_DB_AUTHENTICATION ? undefined : "fire-engine",
    !process.env.USE_DB_AUTHENTICATION ? undefined : "fire-engine;chrome-cdp",
    !process.env.USE_DB_AUTHENTICATION ? undefined : "fire-engine",
    "scrapingBee",
    process.env.USE_DB_AUTHENTICATION ? undefined : "playwright",
    "scrapingBeeLoad",
@ -122,20 +122,38 @@ function getScrapingFallbackOrder(
 export async function scrapSingleUrl(
  jobId: string,
  urlToScrap: string,
-  pageOptions: PageOptions = {
+  pageOptions: PageOptions,
-    onlyMainContent: true,
+  extractorOptions?: ExtractorOptions,
-    includeHtml: false,
+  existingHtml?: string,
-    includeRawHtml: false,
+  priority?: number,
-    waitFor: 0,
+  teamId?: string
    screenshot: false,
    fullPageScreenshot: false,
    headers: undefined,
  },
  extractorOptions: ExtractorOptions = {
    mode: "llm-extraction-from-markdown",
  },
  existingHtml: string = ""
 ): Promise<Document> {
  pageOptions = {
    includeMarkdown: pageOptions.includeMarkdown ?? true,
    onlyMainContent: pageOptions.onlyMainContent ?? false,
    includeHtml: pageOptions.includeHtml ?? false,
    includeRawHtml: pageOptions.includeRawHtml ?? false,
    waitFor: pageOptions.waitFor ?? undefined,
    screenshot: pageOptions.screenshot ?? false,
    fullPageScreenshot: pageOptions.fullPageScreenshot ?? false,
    headers: pageOptions.headers ?? undefined,
    includeLinks: pageOptions.includeLinks ?? true,
    replaceAllPathsWithAbsolutePaths: pageOptions.replaceAllPathsWithAbsolutePaths ?? true,
    parsePDF: pageOptions.parsePDF ?? true,
    removeTags: pageOptions.removeTags ?? [],
    onlyIncludeTags: pageOptions.onlyIncludeTags ?? [],
  }
  if (extractorOptions) {
    extractorOptions = {
      mode: extractorOptions?.mode ?? "llm-extraction-from-markdown",
    }
  }
  if (!existingHtml) {
    existingHtml = "";
  }
  urlToScrap = urlToScrap.trim();
  const attemptScraping = async (
@ -163,7 +181,7 @@ export async function scrapSingleUrl(
      case "fire-engine;chrome-cdp":  
        let engine: "playwright" | "chrome-cdp" | "tlsclient" = "playwright";
-        if(method === "fire-engine;chrome-cdp"){
+        if (method === "fire-engine;chrome-cdp") {
          engine = "chrome-cdp";
        }
@ -177,7 +195,10 @@ export async function scrapSingleUrl(
            headers: pageOptions.headers,
            fireEngineOptions: {
              engine: engine,
-            }
+              atsv: pageOptions.atsv,
            },
            priority,
            teamId,
          });
          scraperResponse.text = response.html;
          scraperResponse.screenshot = response.screenshot;
@ -336,11 +357,11 @@ export async function scrapSingleUrl(
        pageError = undefined;
      }
-      if (text && text.trim().length >= 100) {
+      if ((text && text.trim().length >= 100) || (typeof screenshot === "string" && screenshot.length > 0)) {
-        Logger.debug(`⛏️ ${scraper}: Successfully scraped ${urlToScrap} with text length >= 100, breaking`);
+        Logger.debug(`⛏️ ${scraper}: Successfully scraped ${urlToScrap} with text length >= 100 or screenshot, breaking`);
        break;
      }
-      if (pageStatusCode && pageStatusCode == 404) {
+      if (pageStatusCode && (pageStatusCode == 404 || pageStatusCode == 500)) {
        Logger.debug(`⛏️ ${scraper}: Successfully scraped ${urlToScrap} with status code 404, breaking`);
        break;
      }
@ -359,20 +380,22 @@ export async function scrapSingleUrl(
    let linksOnPage: string[] | undefined;
-    linksOnPage = extractLinks(rawHtml, urlToScrap);
+    if (pageOptions.includeLinks) {
      linksOnPage = extractLinks(rawHtml, urlToScrap);
    }
    let document: Document;
    if (screenshot && screenshot.length > 0) {
      document = {
        content: text,
-        markdown: text,
+        markdown: pageOptions.includeMarkdown ? text : undefined,
        html: pageOptions.includeHtml ? html : undefined,
        rawHtml:
          pageOptions.includeRawHtml ||
-            extractorOptions.mode === "llm-extraction-from-raw-html"
+            extractorOptions?.mode === "llm-extraction-from-raw-html"
            ? rawHtml
            : undefined,
-        linksOnPage,
+        linksOnPage: pageOptions.includeLinks ? linksOnPage : undefined,
        metadata: {
          ...metadata,
          screenshot: screenshot,
@ -384,11 +407,11 @@ export async function scrapSingleUrl(
    } else {
      document = {
        content: text,
-        markdown: text,
+        markdown: pageOptions.includeMarkdown ? text : undefined,
        html: pageOptions.includeHtml ? html : undefined,
        rawHtml:
          pageOptions.includeRawHtml ||
-            extractorOptions.mode === "llm-extraction-from-raw-html"
+            extractorOptions?.mode === "llm-extraction-from-raw-html"
            ? rawHtml
            : undefined,
        metadata: {
@ -397,7 +420,7 @@ export async function scrapSingleUrl(
          pageStatusCode: pageStatusCode,
          pageError: pageError,
        },
-        linksOnPage,
+        linksOnPage: pageOptions.includeLinks ? linksOnPage : undefined,
      };
    }
@ -411,9 +434,9 @@ export async function scrapSingleUrl(
    });
    return {
      content: "",
-      markdown: "",
+      markdown: pageOptions.includeMarkdown ? "" : undefined,
      html: "",
-      linksOnPage: [],
+      linksOnPage: pageOptions.includeLinks ? [] : undefined,
      metadata: {
        sourceURL: urlToScrap,
        pageStatusCode: pageStatusCode,
--- a/apps/api/src/scraper/WebScraper/utils/tests/blocklist.test.ts
+++ b/apps/api/src/scraper/WebScraper/utils/tests/blocklist.test.ts
@ -8,7 +8,6 @@ describe('Blocklist Functionality', () => {
      'https://twitter.com/home',
      'https://instagram.com/explore',
      'https://linkedin.com/in/johndoe',
      'https://pinterest.com/pin/create',
      'https://snapchat.com/add/johndoe',
      'https://tiktok.com/@johndoe',
      'https://reddit.com/r/funny',
--- a/apps/api/src/scraper/WebScraper/utils/tests/socialBlockList.test.ts
+++ b/apps/api/src/scraper/WebScraper/utils/tests/socialBlockList.test.ts
@ -8,7 +8,6 @@ describe('isUrlBlocked', () => {
      'https://twitter.com/someuser',
      'https://instagram.com/someuser',
      'https://www.linkedin.com/in/someuser',
      'https://pinterest.com/someuser',
      'https://snapchat.com/someuser',
      'https://tiktok.com/@someuser',
      'https://reddit.com/r/somesubreddit',
--- a/apps/api/src/scraper/WebScraper/utils/blocklist.ts
+++ b/apps/api/src/scraper/WebScraper/utils/blocklist.ts
@ -6,7 +6,6 @@ const socialMediaBlocklist = [
  'twitter.com',
  'instagram.com',
  'linkedin.com',
  'pinterest.com',
  'snapchat.com',
  'tiktok.com',
  'reddit.com',
@ -15,6 +14,11 @@ const socialMediaBlocklist = [
  'whatsapp.com',
  'wechat.com',
  'telegram.org',
  'researchhub.com',
  'youtube.com',
  'corterix.com',
  'southwest.com',
  'ryanair.com'
 ];
 const allowedKeywords = [
--- a/apps/api/src/scraper/WebScraper/utils/custom/website_params.ts
+++ b/apps/api/src/scraper/WebScraper/utils/custom/website_params.ts
@ -234,5 +234,13 @@ export const urlSpecificParams = {
        engine: "tlsclient",
      },
    },
  },
  "zoopla.co.uk":{
    defaultScraper: "fire-engine",
    params:{
      fireEngineOptions:{
        engine: "chrome-cdp",
      },
    },
  }
 };
--- a/apps/api/src/scraper/WebScraper/utils/metadata.ts
+++ b/apps/api/src/scraper/WebScraper/utils/metadata.ts
@ -75,9 +75,7 @@ export function extractMetadata(soup: CheerioAPI, url: string): Metadata {
    description = soup('meta[name="description"]').attr("content") || null;
    // Assuming the language is part of the URL as per the regex pattern
-    const pattern = /([a-zA-Z]+-[A-Z]{2})/;
+    language = soup('html').attr('lang') || null;
    const match = pattern.exec(url);
    language = match ? match[1] : null;
    keywords = soup('meta[name="keywords"]').attr("content") || null;
    robots = soup('meta[name="robots"]').attr("content") || null;
--- a/apps/api/src/scraper/WebScraper/utils/utils.ts
+++ b/apps/api/src/scraper/WebScraper/utils/utils.ts
@ -41,10 +41,10 @@ export function extractLinks(html: string, baseUrl: string): string[] {
        links.push(href);
      } else if (href.startsWith('/')) {
        // Relative URL starting with '/', append to origin
-        links.push(`${origin}${href}`);
+        links.push(new URL(href, baseUrl).href);
      } else if (!href.startsWith('#') && !href.startsWith('mailto:')) {
        // Relative URL not starting with '/', append to base URL
-        links.push(`${baseUrl}/${href}`);
+        links.push(new URL(href, baseUrl).href);
      } else if (href.startsWith('mailto:')) {
        // mailto: links, add as is
        links.push(href);
--- a/apps/api/src/search/fireEngine.ts
+++ b/apps/api/src/search/fireEngine.ts
@ -0,0 +1,45 @@
 import axios from "axios";
 import dotenv from "dotenv";
 import { SearchResult } from "../../src/lib/entities";
 dotenv.config();
 export async function fireEngineMap(q: string, options: {
    tbs?: string;
    filter?: string;
    lang?: string;
    country?: string;
    location?: string;
    numResults: number;
    page?: number;
 }): Promise<SearchResult[]> {
  let data = JSON.stringify({
    query: q,
    lang: options.lang,
    country: options.country,
    location: options.location,
    tbs: options.tbs,
    numResults: options.numResults,
    page: options.page ?? 1,
  });
  if (!process.env.FIRE_ENGINE_BETA_URL) {
    console.warn("(v1/map Beta) Results might differ from cloud offering currently.");
    return [];
  }
  let config = {
    method: "POST",
    url: `${process.env.FIRE_ENGINE_BETA_URL}/search`,
    headers: {
      "Content-Type": "application/json",
    },
    data: data,
  };
  const response = await axios(config);
  if (response && response) {
    return response.data
  } else {
    return [];
  }
 }
--- a/apps/api/src/search/googlesearch.ts
+++ b/apps/api/src/search/googlesearch.ts
@ -52,7 +52,7 @@ async function _req(term: string, results: number, lang: string, country: string
-export async function google_search(term: string, advanced = false, num_results = 7, tbs = null, filter = null, lang = "en", country = "us", proxy = null, sleep_interval = 0, timeout = 5000, ) :Promise<SearchResult[]> {
+export async function googleSearch(term: string, advanced = false, num_results = 7, tbs = null, filter = null, lang = "en", country = "us", proxy = null, sleep_interval = 0, timeout = 5000, ) :Promise<SearchResult[]> {
    let proxies = null;
    if (proxy) {
        if (proxy.startsWith("https")) {
--- a/apps/api/src/search/index.ts
+++ b/apps/api/src/search/index.ts
@ -1,11 +1,9 @@
 import { Logger } from "../../src/lib/logger";
 import { SearchResult } from "../../src/lib/entities";
-import { google_search } from "./googlesearch";
+import { googleSearch } from "./googlesearch";
 import { fireEngineMap } from "./fireEngine";
 import { serper_search } from "./serper";
 export async function search({
  query,
  advanced = false,
@ -30,12 +28,20 @@ export async function search({
  proxy?: string;
  sleep_interval?: number;
  timeout?: number;
-}) : Promise<SearchResult[]> {
+}): Promise<SearchResult[]> {
  try {
-    if (process.env.SERPER_API_KEY ) {
+    
-      return await serper_search(query, {num_results, tbs, filter, lang, country, location});
+    if (process.env.SERPER_API_KEY) {
      return await serper_search(query, {
        num_results,
        tbs,
        filter,
        lang,
        country,
        location,
      });
    }
-    return await google_search(
+    return await googleSearch(
      query,
      advanced,
      num_results,
@ -49,7 +55,6 @@ export async function search({
    );
  } catch (error) {
    Logger.error(`Error in search function: ${error}`);
-    return []
+    return [];
  }
  // if process.env.SERPER_API_KEY is set, use serper
 }
--- a/apps/api/src/services/alerts/index.ts
+++ b/apps/api/src/services/alerts/index.ts
@ -1,5 +1,5 @@
 import { Logger } from "../../../src/lib/logger";
-import { getWebScraperQueue } from "../queue-service";
+import { getScrapeQueue } from "../queue-service";
 import { sendSlackWebhook } from "./slack";
 export async function checkAlerts() {
@ -13,8 +13,8 @@ export async function checkAlerts() {
      Logger.info("Initializing alerts");
      const checkActiveJobs = async () => {
        try {
-          const webScraperQueue = getWebScraperQueue();
+          const scrapeQueue = getScrapeQueue();
-          const activeJobs = await webScraperQueue.getActiveCount();
+          const activeJobs = await scrapeQueue.getActiveCount();
          if (activeJobs > Number(process.env.ALERT_NUM_ACTIVE_JOBS)) {
            Logger.warn(
              `Alert: Number of active jobs is over ${process.env.ALERT_NUM_ACTIVE_JOBS}. Current active jobs: ${activeJobs}.`
@ -34,11 +34,10 @@ export async function checkAlerts() {
      };
      const checkWaitingQueue = async () => {
-        const webScraperQueue = getWebScraperQueue();
+        const scrapeQueue = getScrapeQueue();
-        const waitingJobs = await webScraperQueue.getWaitingCount();
+        const waitingJobs = await scrapeQueue.getWaitingCount();
        const paused = await webScraperQueue.getPausedCount();
-        if (waitingJobs !== paused && waitingJobs > Number(process.env.ALERT_NUM_WAITING_JOBS)) {
+        if (waitingJobs > Number(process.env.ALERT_NUM_WAITING_JOBS)) {
          Logger.warn(
            `Alert: Number of waiting jobs is over ${process.env.ALERT_NUM_WAITING_JOBS}. Current waiting jobs: ${waitingJobs}.`
          );
--- a/apps/api/src/services/alerts/slack.ts
+++ b/apps/api/src/services/alerts/slack.ts
@ -3,9 +3,9 @@ import { Logger } from "../../../src/lib/logger";
 export async function sendSlackWebhook(
  message: string,
-  alertEveryone: boolean = false
+  alertEveryone: boolean = false,
  webhookUrl: string = process.env.SLACK_WEBHOOK_URL ?? ""
 ) {
  const webhookUrl = process.env.SLACK_WEBHOOK_URL;
  const messagePrefix = alertEveryone ? "<!channel> " : "";
  const payload = {
    text: `${messagePrefix} ${message}`,
--- a/apps/api/src/services/billing/credit_billing.ts
+++ b/apps/api/src/services/billing/credit_billing.ts
@ -168,10 +168,11 @@ export async function supaBillTeam(team_id: string, credits: number) {
 export async function checkTeamCredits(team_id: string, credits: number) {
  return withAuth(supaCheckTeamCredits)(team_id, credits);
 }
 // if team has enough credits for the operation, return true, else return false
 export async function supaCheckTeamCredits(team_id: string, credits: number) {
  if (team_id === "preview") {
-    return { success: true, message: "Preview team, no credits used" };
+    return { success: true, message: "Preview team, no credits used", remainingCredits: Infinity };
  }
  // Retrieve the team's active subscription and check for available coupons concurrently
@ -202,7 +203,7 @@ export async function supaCheckTeamCredits(team_id: string, credits: number) {
  if (subscriptionError || !subscription) {
    // If there is no active subscription but there are available coupons
    if (couponCredits >= credits) {
-      return { success: true, message: "Sufficient credits available" };
+      return { success: true, message: "Sufficient credits available", remainingCredits: couponCredits };
    }
    const { data: creditUsages, error: creditUsageError } =
@ -252,9 +253,10 @@ export async function supaCheckTeamCredits(team_id: string, credits: number) {
      return {
        success: false,
        message: "Insufficient credits, please upgrade!",
        remainingCredits: FREE_CREDITS - totalCreditsUsed
      };
    }
-    return { success: true, message: "Sufficient credits available" };
+    return { success: true, message: "Sufficient credits available", remainingCredits: FREE_CREDITS - totalCreditsUsed };
  }
  let totalCreditsUsed = 0;
@ -315,24 +317,24 @@ export async function supaCheckTeamCredits(team_id: string, credits: number) {
  // Compare the adjusted total credits used with the credits allowed by the plan
  if (adjustedCreditsUsed + credits > price.credits) {
-    await sendNotification(
+    // await sendNotification(
-      team_id,
+    //   team_id,
-      NotificationType.LIMIT_REACHED,
+    //   NotificationType.LIMIT_REACHED,
-      subscription.current_period_start,
+    //   subscription.current_period_start,
-      subscription.current_period_end
+    //   subscription.current_period_end
-    );
+    // );
-    return { success: false, message: "Insufficient credits, please upgrade!" };
+    return { success: false, message: "Insufficient credits, please upgrade!", remainingCredits: creditLimit - adjustedCreditsUsed };
  } else if (creditUsagePercentage >= 0.8) {
    // Send email notification for approaching credit limit
-    await sendNotification(
+    // await sendNotification(
-      team_id,
+    //   team_id,
-      NotificationType.APPROACHING_LIMIT,
+    //   NotificationType.APPROACHING_LIMIT,
-      subscription.current_period_start,
+    //   subscription.current_period_start,
-      subscription.current_period_end
+    //   subscription.current_period_end
-    );
+    // );
  }
-  return { success: true, message: "Sufficient credits available" };
+  return { success: true, message: "Sufficient credits available", remainingCredits: creditLimit - adjustedCreditsUsed };
 }
 // Count the total credits used by a team within the current billing period and return the remaining credits.
--- a/apps/api/src/services/logging/log_job.ts
+++ b/apps/api/src/services/logging/log_job.ts
@ -40,10 +40,11 @@ export async function logJob(job: FirecrawlJob) {
          extractor_options: job.extractor_options,
          num_tokens: job.num_tokens,
          retry: !!job.retry,
          crawl_id: job.crawl_id,
        },
      ]);
-    if (process.env.POSTHOG_API_KEY) {
+    if (process.env.POSTHOG_API_KEY && !job.crawl_id) {
      let phLog = {
        distinctId: "from-api", //* To identify this on the group level, setting distinctid to a static string per posthog docs: https://posthog.com/docs/product-analytics/group-analytics#advanced-server-side-only-capturing-group-events-without-a-user
        ...(job.team_id !== "preview" && {
--- a/apps/api/src/services/queue-jobs.ts
+++ b/apps/api/src/services/queue-jobs.ts
@ -1,17 +1,71 @@
-import { Job, Queue } from "bull";
+import { Job, Queue } from "bullmq";
-import {
+import { getScrapeQueue } from "./queue-service";
  getWebScraperQueue,
 } from "./queue-service";
 import { v4 as uuidv4 } from "uuid";
 import { WebScraperOptions } from "../types";
 import * as Sentry from "@sentry/node";
-export async function addWebScraperJob(
+async function addScrapeJobRaw(
-  webScraperOptions: WebScraperOptions,
+  webScraperOptions: any,
-  options: any = {}
+  options: any,
  jobId: string,
  jobPriority: number = 10
 ): Promise<Job> {
-  return await getWebScraperQueue().add(webScraperOptions, {
+  return await getScrapeQueue().add(jobId, webScraperOptions, {
    ...options,
-    jobId: uuidv4(),
+    priority: jobPriority,
    jobId,
  });
 }
 export async function addScrapeJob(
  webScraperOptions: WebScraperOptions,
  options: any = {},
  jobId: string = uuidv4(),
  jobPriority: number = 10
 ): Promise<Job> {
  if (Sentry.isInitialized()) {
    const size = JSON.stringify(webScraperOptions).length;
    return await Sentry.startSpan({
      name: "Add scrape job",
      op: "queue.publish",
      attributes: {
        "messaging.message.id": jobId,
        "messaging.destination.name": getScrapeQueue().name,
        "messaging.message.body.size": size,
      },
    }, async (span) => {
      return await addScrapeJobRaw({
        ...webScraperOptions,
        sentry: {
          trace: Sentry.spanToTraceHeader(span),
          baggage: Sentry.spanToBaggageHeader(span),
          size,
        },
      }, options, jobId, jobPriority);
    });
  } else {
    return await addScrapeJobRaw(webScraperOptions, options, jobId, jobPriority);
  }
 }
 export function waitForJob(jobId: string, timeout: number) {
  return new Promise((resolve, reject) => {
    const start = Date.now();
    const int = setInterval(async () => {
      if (Date.now() >= start + timeout) {
        clearInterval(int);
        reject(new Error("Job wait "));
      } else {
        const state = await getScrapeQueue().getJobState(jobId);
        if (state === "completed") {
          clearInterval(int);
          resolve((await getScrapeQueue().getJob(jobId)).returnvalue);
        } else if (state === "failed") {
          clearInterval(int);
          reject((await getScrapeQueue().getJob(jobId)).failedReason);
        }
      }
    }, 1000);
  })
 }
--- a/apps/api/src/services/queue-service.ts
+++ b/apps/api/src/services/queue-service.ts
@ -1,23 +1,40 @@
-import Queue from "bull";
+import { Queue } from "bullmq";
 import { Queue as BullQueue } from "bull";
 import { Logger } from "../lib/logger";
 import IORedis from "ioredis";
-let webScraperQueue: BullQueue;
+let scrapeQueue: Queue;
-export function getWebScraperQueue() {
+export const redisConnection = new IORedis(process.env.REDIS_URL, {
-  if (!webScraperQueue) {
+  maxRetriesPerRequest: null,
-    webScraperQueue = new Queue("web-scraper", process.env.REDIS_URL, {
+});
-      settings: {
+
-        lockDuration: 1 * 60 * 1000, // 1 minute in milliseconds,
+export const scrapeQueueName = "{scrapeQueue}";
-        lockRenewTime: 15 * 1000, // 15 seconds in milliseconds
+
-        stalledInterval: 30 * 1000,
+export function getScrapeQueue() {
-        maxStalledCount: 10,
+  if (!scrapeQueue) {
-      },
+    scrapeQueue = new Queue(
-      defaultJobOptions:{
+      scrapeQueueName,
-        attempts: 2
+      {
        connection: redisConnection,
      }
-    });
+      //   {
      //   settings: {
      //     lockDuration: 1 * 60 * 1000, // 1 minute in milliseconds,
      //     lockRenewTime: 15 * 1000, // 15 seconds in milliseconds
      //     stalledInterval: 30 * 1000,
      //     maxStalledCount: 10,
      //   },
      //   defaultJobOptions:{
      //     attempts: 5
      //   }
      // }
    );
    Logger.info("Web scraper queue created");
  }
-  return webScraperQueue;
+  return scrapeQueue;
 }
 // === REMOVED IN FAVOR OF POLLING -- NOT RELIABLE
 // import { QueueEvents } from 'bullmq';
 // export const scrapeQueueEvents = new QueueEvents(scrapeQueueName, { connection: redisConnection.duplicate() });
--- a/apps/api/src/services/queue-worker.ts
+++ b/apps/api/src/services/queue-worker.ts
@ -1,74 +1,362 @@
 import { CustomError } from "../lib/custom-error";
 import { getWebScraperQueue } from "./queue-service";
 import "dotenv/config";
 import "./sentry"
 import * as Sentry from "@sentry/node";
 import { CustomError } from "../lib/custom-error";
 import {
  getScrapeQueue,
  redisConnection,
  scrapeQueueName,
 } from "./queue-service";
 import { logtail } from "./logtail";
 import { startWebScraperPipeline } from "../main/runWebScraper";
 import { callWebhook } from "./webhook";
 import { logJob } from "./logging/log_job";
-import { initSDK } from '@hyperdx/node-opentelemetry';
+import { initSDK } from "@hyperdx/node-opentelemetry";
-import { Job } from "bull";
+import { Job } from "bullmq";
 import { Logger } from "../lib/logger";
-import { ScrapeEvents } from "../lib/scrape-events";
+import { Worker } from "bullmq";
 import systemMonitor from "./system-monitor";
 import { v4 as uuidv4 } from "uuid";
 import { addCrawlJob, addCrawlJobDone, crawlToCrawler, finishCrawl, getCrawl, getCrawlJobs, lockURL } from "../lib/crawl-redis";
 import { StoredCrawl } from "../lib/crawl-redis";
 import { addScrapeJob } from "./queue-jobs";
 import { supabaseGetJobById } from "../../src/lib/supabase-jobs";
 import { addJobPriority, deleteJobPriority, getJobPriority } from "../../src/lib/job-priority";
 import { PlanType } from "../types";
-if (process.env.ENV === 'production') {
+if (process.env.ENV === "production") {
  initSDK({
    consoleCapture: true,
    additionalInstrumentations: [],
  });
 }
 const sleep = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms));
-const wsq = getWebScraperQueue();
+const workerLockDuration = Number(process.env.WORKER_LOCK_DURATION) || 60000;
 const workerStalledCheckInterval =
  Number(process.env.WORKER_STALLED_CHECK_INTERVAL) || 30000;
 const jobLockExtendInterval =
  Number(process.env.JOB_LOCK_EXTEND_INTERVAL) || 15000;
 const jobLockExtensionTime =
  Number(process.env.JOB_LOCK_EXTENSION_TIME) || 60000;
-async function processJob(job: Job, done) {
+const cantAcceptConnectionInterval =
  Number(process.env.CANT_ACCEPT_CONNECTION_INTERVAL) || 2000;
 const connectionMonitorInterval =
  Number(process.env.CONNECTION_MONITOR_INTERVAL) || 10;
 const gotJobInterval = Number(process.env.CONNECTION_MONITOR_INTERVAL) || 20;
 const processJobInternal = async (token: string, job: Job) => {
  const extendLockInterval = setInterval(async () => {
    Logger.info(`🐂 Worker extending lock on job ${job.id}`);
    await job.extendLock(token, jobLockExtensionTime);
  }, jobLockExtendInterval);
  await addJobPriority(job.data.team_id, job.id );
  let err = null;
  try {
    const result = await processJob(job, token);
    try{
      if (job.data.crawl_id && process.env.USE_DB_AUTHENTICATION === "true") {
        await job.moveToCompleted(null, token, false);
      } else {
        await job.moveToCompleted(result.docs, token, false);
      }
    }catch(e){
    }
  } catch (error) {
    console.log("Job failed, error:", error);
    Sentry.captureException(error);
    err = error;
    await job.moveToFailed(error, token, false);
  } finally {
    await deleteJobPriority(job.data.team_id, job.id );
    clearInterval(extendLockInterval);
  }
  return err;
 };
 let isShuttingDown = false;
 process.on("SIGINT", () => {
  console.log("Received SIGINT. Shutting down gracefully...");
  isShuttingDown = true;
 });
 const workerFun = async (queueName: string, processJobInternal: (token: string, job: Job) => Promise<any>) => {
  const worker = new Worker(queueName, null, {
    connection: redisConnection,
    lockDuration: 1 * 60 * 1000, // 1 minute
    // lockRenewTime: 15 * 1000, // 15 seconds
    stalledInterval: 30 * 1000, // 30 seconds
    maxStalledCount: 10, // 10 times
  });
  worker.startStalledCheckTimer();
  const monitor = await systemMonitor;
  while (true) {
    if (isShuttingDown) {
      console.log("No longer accepting new jobs. SIGINT");
      break;
    }
    const token = uuidv4();
    const canAcceptConnection = await monitor.acceptConnection();
    if (!canAcceptConnection) {
      console.log("Cant accept connection");
      await sleep(cantAcceptConnectionInterval); // more sleep
      continue;
    }
    const job = await worker.getNextJob(token);
    if (job) {
      if (job.data && job.data.sentry && Sentry.isInitialized()) {
        Sentry.continueTrace({ sentryTrace: job.data.sentry.trace, baggage: job.data.sentry.baggage }, () => {
          Sentry.startSpan({
            name: "Scrape job",
            attributes: {
              job: job.id,
              worker: process.env.FLY_MACHINE_ID ?? worker.id,
            },
          }, async (span) => {
            await Sentry.startSpan({
              name: "Process scrape job",
              op: "queue.process",
              attributes: {
                "messaging.message.id": job.id,
                "messaging.destination.name": getScrapeQueue().name,
                "messaging.message.body.size": job.data.sentry.size,
                "messaging.message.receive.latency": Date.now() - (job.processedOn ?? job.timestamp),
                "messaging.message.retry.count": job.attemptsMade,
              }
            }, async () => {
              const res = await processJobInternal(token, job);
              if (res !== null) {
                span.setStatus({ code: 2 }); // ERROR
              } else {
                span.setStatus({ code: 1 }); // OK
              }
            });
          });
        });
      } else {
        Sentry.startSpan({
          name: "Scrape job",
          attributes: {
            job: job.id,
            worker: process.env.FLY_MACHINE_ID ?? worker.id,
          },
        }, () => {
          processJobInternal(token, job);
        });
      }
      await sleep(gotJobInterval);
    } else {
      await sleep(connectionMonitorInterval);
    }
  }
 };
 workerFun(scrapeQueueName, processJobInternal);
 async function processJob(job: Job, token: string) {
  Logger.info(`🐂 Worker taking job ${job.id}`);
  // Check if the job URL is researchhub and block it immediately
  // TODO: remove this once solve the root issue
  if (job.data.url && (job.data.url.includes("researchhub.com") || job.data.url.includes("ebay.com") || job.data.url.includes("youtube.com") || job.data.url.includes("microsoft.com") )) {
    Logger.info(`🐂 Blocking job ${job.id} with URL ${job.data.url}`);
    const data = {
      success: false,
      docs: [],
      project_id: job.data.project_id,
      error: "URL is blocked. Suspecious activity detected. Please contact hello@firecrawl.com if you believe this is an error.",
    };
    await job.moveToCompleted(data.docs, token, false);
    return data;
  }
  try {
-    job.progress({
+    job.updateProgress({
      current: 1,
      total: 100,
      current_step: "SCRAPING",
      current_url: "",
    });
    const start = Date.now();
-    const { success, message, docs } = await startWebScraperPipeline({ job });
+    
    const { success, message, docs } = await startWebScraperPipeline({
      job,
      token,
    });
    const end = Date.now();
    const timeTakenInSeconds = (end - start) / 1000;
    const rawHtml = docs[0] ? docs[0].rawHtml : "";
    if (job.data.crawl_id && (!job.data.pageOptions || !job.data.pageOptions.includeRawHtml)) {
      if (docs[0] && docs[0].rawHtml) {
        delete docs[0].rawHtml;
      }
    }
    const data = {
-      success: success,
+      success,
      result: {
        links: docs.map((doc) => {
-          return { content: doc, source: doc?.metadata?.sourceURL ?? doc?.url ?? "" };
+          return {
            content: doc,
            source: doc?.metadata?.sourceURL ?? doc?.url ?? "",
          };
        }),
      },
      project_id: job.data.project_id,
      error: message /* etc... */,
      docs,
    };
-    await callWebhook(job.data.team_id, job.id as string, data);
+    if (job.data.mode === "crawl") {
      await callWebhook(job.data.team_id, job.id as string, data, job.data.webhook, job.data.v1);
    }
    if (job.data.crawl_id) {
      await logJob({
        job_id: job.id as string,
        success: success,
        message: message,
        num_docs: docs.length,
        docs: docs,
        time_taken: timeTakenInSeconds,
        team_id: job.data.team_id,
        mode: job.data.mode,
        url: job.data.url,
        crawlerOptions: job.data.crawlerOptions,
        pageOptions: job.data.pageOptions,
        origin: job.data.origin,
        crawl_id: job.data.crawl_id,
      });
      await addCrawlJobDone(job.data.crawl_id, job.id);
      const sc = await getCrawl(job.data.crawl_id) as StoredCrawl;
      if (!job.data.sitemapped) {
        if (!sc.cancelled) {
          const crawler = crawlToCrawler(job.data.crawl_id, sc);
          const links = crawler.filterLinks(
            crawler.extractLinksFromHTML(rawHtml ?? "", sc.originUrl),
            Infinity,
            sc.crawlerOptions?.maxDepth ?? 10
          )
          for (const link of links) {
            if (await lockURL(job.data.crawl_id, sc, link)) {
              // This seems to work really welel
              const jobPriority = await getJobPriority({plan:sc.plan as PlanType, team_id: sc.team_id, basePriority: job.data.crawl_id ? 20 : 10})
              const jobId = uuidv4();
              // console.log("plan: ",  sc.plan);
              // console.log("team_id: ", sc.team_id)
              // console.log("base priority: ", job.data.crawl_id ? 20 : 10)
              // console.log("job priority: " , jobPriority, "\n\n\n")
              const newJob = await addScrapeJob({
                url: link,
                mode: "single_urls",
                crawlerOptions: sc.crawlerOptions,
                team_id: sc.team_id,
                pageOptions: sc.pageOptions,
                origin: job.data.origin,
                crawl_id: job.data.crawl_id,
                v1: job.data.v1,
              }, {}, jobId, jobPriority);
              await addCrawlJob(job.data.crawl_id, newJob.id);
            }
          }
        }
      }
      if (await finishCrawl(job.data.crawl_id)) {
        const jobIDs = await getCrawlJobs(job.data.crawl_id);
        const jobs = (await Promise.all(jobIDs.map(async x => {
          if (x === job.id) {
            return {
              async getState() {
                return "completed"
              },
              timestamp: Date.now(),
              returnvalue: docs,
            }
          }
          const j = await getScrapeQueue().getJob(x);
          if (process.env.USE_DB_AUTHENTICATION === "true") {
            const supabaseData = await supabaseGetJobById(j.id);
            if (supabaseData) {
              j.returnvalue = supabaseData.docs;
            }
          }
          return j;
        }))).sort((a, b) => a.timestamp - b.timestamp);
        const jobStatuses = await Promise.all(jobs.map(x => x.getState()));
        const jobStatus = sc.cancelled || jobStatuses.some(x => x === "failed") ? "failed" : "completed";
        const fullDocs = jobs.map(x => Array.isArray(x.returnvalue) ? x.returnvalue[0] : x.returnvalue);
        await logJob({
          job_id: job.data.crawl_id,
          success: jobStatus === "completed",
          message: sc.cancelled ? "Cancelled" : message,
          num_docs: fullDocs.length,
          docs: [],
          time_taken: (Date.now() - sc.createdAt) / 1000,
          team_id: job.data.team_id,
          mode: "crawl",
          url: sc.originUrl,
          crawlerOptions: sc.crawlerOptions,
          pageOptions: sc.pageOptions,
          origin: job.data.origin,
        });
        const data = {
          success: jobStatus !== "failed",
          result: {
            links: fullDocs.map((doc) => {
              return {
                content: doc,
                source: doc?.metadata?.sourceURL ?? doc?.url ?? "",
              };
            }),
          },
          project_id: job.data.project_id,
          error: message /* etc... */,
          docs: fullDocs,
        };
        await callWebhook(job.data.team_id, job.data.crawl_id, data, job.data.webhook, job.data.v1);
      }
    }
    await logJob({
      job_id: job.id as string,
      success: success,
      message: message,
      num_docs: docs.length,
      docs: docs,
      time_taken: timeTakenInSeconds,
      team_id: job.data.team_id,
      mode: "crawl",
      url: job.data.url,
      crawlerOptions: job.data.crawlerOptions,
      pageOptions: job.data.pageOptions,
      origin: job.data.origin,
    });
    Logger.info(`🐂 Job done ${job.id}`);
-    done(null, data);
+    return data;
  } catch (error) {
    Logger.error(`🐂 Job errored ${job.id} - ${error}`);
-    if (await getWebScraperQueue().isPaused(false)) {
+
-      Logger.debug("🐂Queue is paused, ignoring");
+    Sentry.captureException(error, {
-      return;
+      data: {
-    }
+        job: job.id
      },
    })
    if (error instanceof CustomError) {
      // Here we handle the error, then save the failed job
@ -81,6 +369,9 @@ async function processJob(job: Job, done) {
      });
    }
    Logger.error(error);
    if (error.stack) {
      Logger.error(error.stack);
    }
    logtail.error("Overall error ingesting", {
      job_id: job.id,
@ -89,37 +380,69 @@ async function processJob(job: Job, done) {
    const data = {
      success: false,
      docs: [],
      project_id: job.data.project_id,
      error:
        "Something went wrong... Contact help@mendable.ai or try again." /* etc... */,
    };
-    await callWebhook(job.data.team_id, job.id as string, data);
+    
-    await logJob({
+    if (job.data.mode === "crawl" || job.data.crawl_id) {
-      job_id: job.id as string,
+      await callWebhook(job.data.team_id, job.data.crawl_id ?? job.id as string, data, job.data.webhook, job.data.v1);
-      success: false,
+    }
-      message: typeof error === 'string' ? error : (error.message ?? "Something went wrong... Contact help@mendable.ai"),
+    
-      num_docs: 0,
+    if (job.data.crawl_id) {
-      docs: [],
+      await logJob({
-      time_taken: 0,
+        job_id: job.id as string,
-      team_id: job.data.team_id,
+        success: false,
-      mode: "crawl",
+        message:
-      url: job.data.url,
+          typeof error === "string"
-      crawlerOptions: job.data.crawlerOptions,
+            ? error
-      pageOptions: job.data.pageOptions,
+            : error.message ?? "Something went wrong... Contact help@mendable.ai",
-      origin: job.data.origin,
+        num_docs: 0,
-    });
+        docs: [],
-    done(null, data);
+        time_taken: 0,
        team_id: job.data.team_id,
        mode: job.data.mode,
        url: job.data.url,
        crawlerOptions: job.data.crawlerOptions,
        pageOptions: job.data.pageOptions,
        origin: job.data.origin,
        crawl_id: job.data.crawl_id,
      });
      const sc = await getCrawl(job.data.crawl_id);
      await logJob({
        job_id: job.data.crawl_id,
        success: false,
        message:
          typeof error === "string"
            ? error
            : error.message ?? "Something went wrong... Contact help@mendable.ai",
        num_docs: 0,
        docs: [],
        time_taken: 0,
        team_id: job.data.team_id,
        mode: "crawl",
        url: sc ? sc.originUrl : job.data.url,
        crawlerOptions: sc ? sc.crawlerOptions : job.data.crawlerOptions,
        pageOptions: sc ? sc.pageOptions : job.data.pageOptions,
        origin: job.data.origin,
      });
    }
    // done(null, data);
    return data;
  }
 }
-wsq.process(
+// wsq.process(
-  Math.floor(Number(process.env.NUM_WORKERS_PER_QUEUE ?? 8)),
+//   Math.floor(Number(process.env.NUM_WORKERS_PER_QUEUE ?? 8)),
-  processJob
+//   processJob
-);
+// );
-wsq.on("waiting", j => ScrapeEvents.logJobEvent(j, "waiting"));
+// wsq.on("waiting", j => ScrapeEvents.logJobEvent(j, "waiting"));
-wsq.on("active", j => ScrapeEvents.logJobEvent(j, "active"));
+// wsq.on("active", j => ScrapeEvents.logJobEvent(j, "active"));
-wsq.on("completed", j => ScrapeEvents.logJobEvent(j, "completed"));
+// wsq.on("completed", j => ScrapeEvents.logJobEvent(j, "completed"));
-wsq.on("paused", j => ScrapeEvents.logJobEvent(j, "paused"));
+// wsq.on("paused", j => ScrapeEvents.logJobEvent(j, "paused"));
-wsq.on("resumed", j => ScrapeEvents.logJobEvent(j, "resumed"));
+// wsq.on("resumed", j => ScrapeEvents.logJobEvent(j, "resumed"));
-wsq.on("removed", j => ScrapeEvents.logJobEvent(j, "removed"));
+// wsq.on("removed", j => ScrapeEvents.logJobEvent(j, "removed"));
--- a/apps/api/src/services/rate-limiter.test.ts
+++ b/apps/api/src/services/rate-limiter.test.ts
@ -65,7 +65,7 @@ describe("Rate Limiter Service", () => {
      "test-prefix:someToken",
      "standard"
    );
-    expect(limiter2.points).toBe(50);
+    expect(limiter2.points).toBe(100);
    const limiter3 = getRateLimiter(
      "search" as RateLimiterMode,
@ -79,7 +79,7 @@ describe("Rate Limiter Service", () => {
      "test-prefix:someToken",
      "growth"
    );
-    expect(limiter4.points).toBe(150);
+    expect(limiter4.points).toBe(250);
  });
  it("should return the default rate limiter if plan is not provided", () => {
@ -153,7 +153,7 @@ describe("Rate Limiter Service", () => {
      "crawlStatus" as RateLimiterMode,
      "test-prefix:someToken"
    );
-    expect(limiter2.points).toBe(150);
+    expect(limiter2.points).toBe(250);
  });
  it("should consume points correctly for 'crawl' mode", async () => {
@ -188,14 +188,13 @@ describe("Rate Limiter Service", () => {
      "test-prefix:someTokenXY",
      "hobby"
    );
-    // expect hobby to have 100 points
+    expect(limiter.points).toBe(20);
    expect(limiter.points).toBe(10);
    const consumePoints = 5;
    const res = await limiter.consume("test-prefix:someTokenXY", consumePoints);
    expect(res.consumedPoints).toBe(5);
-    expect(res.remainingPoints).toBe(5);
+    expect(res.remainingPoints).toBe(15);
  });
  it("should return the correct rate limiter for 'crawl' mode", () => {
@ -227,7 +226,7 @@ describe("Rate Limiter Service", () => {
      "test-prefix:someToken",
      "free"
    );
-    expect(limiter.points).toBe(5);
+    expect(limiter.points).toBe(10);
    const limiter2 = getRateLimiter(
      "scrape" as RateLimiterMode,
@ -241,7 +240,14 @@ describe("Rate Limiter Service", () => {
      "test-prefix:someToken",
      "standard"
    );
-    expect(limiter3.points).toBe(50);
+    expect(limiter3.points).toBe(100);
    const limiter4 = getRateLimiter(
      "scrape" as RateLimiterMode,
      "test-prefix:someToken",
      "growth"
    );
    expect(limiter4.points).toBe(1000);
  });
  it("should return the correct rate limiter for 'search' mode", () => {
@ -309,7 +315,7 @@ describe("Rate Limiter Service", () => {
      "crawlStatus" as RateLimiterMode,
      "test-prefix:someToken"
    );
-    expect(limiter2.points).toBe(150);
+    expect(limiter2.points).toBe(250);
  });
  it("should return the correct rate limiter for 'testSuite' mode", () => {
--- a/apps/api/src/services/rate-limiter.ts
+++ b/apps/api/src/services/rate-limiter.ts
@ -14,18 +14,20 @@ const RATE_LIMITS = {
    standardNew: 10,
    standardnew: 10,
    growth: 50,
    growthdouble: 50,
  },
  scrape: {
    default: 20,
-    free: 5,
+    free: 10,
    starter: 20,
-    standard: 50,
+    standard: 100,
    standardOld: 40,
    scale: 500,
-    hobby: 10,
+    hobby: 20,
-    standardNew: 50,
+    standardNew: 100,
-    standardnew: 50,
+    standardnew: 100,
-    growth: 500,
+    growth: 1000,
    growthdouble: 1000,
  },
  search: {
    default: 20,
@ -38,6 +40,20 @@ const RATE_LIMITS = {
    standardNew: 50,
    standardnew: 50,
    growth: 500,
    growthdouble: 500,
  },
  map:{
    default: 20,
    free: 5,
    starter: 20,
    standard: 40,
    standardOld: 40,
    scale: 500,
    hobby: 10,
    standardNew: 50,
    standardnew: 50,
    growth: 500,
    growthdouble: 500,
  },
  preview: {
    free: 5,
@ -49,7 +65,7 @@ const RATE_LIMITS = {
  },
  crawlStatus: {
    free: 150,
-    default: 150,
+    default: 250,
  },
  testSuite: {
    free: 10000,
@ -81,16 +97,28 @@ export const testSuiteRateLimiter = new RateLimiterRedis({
  duration: 60, // Duration in seconds
 });
 export const devBRateLimiter = new RateLimiterRedis({
  storeClient: redisRateLimitClient,
  keyPrefix: "dev-b",
  points: 1200,
  duration: 60, // Duration in seconds
 });
 export function getRateLimiter(
  mode: RateLimiterMode,
  token: string,
-  plan?: string
+  plan?: string,
  teamId?: string
 ) {
-  if (token.includes("a01ccae") || token.includes("6254cf9")) {
+  if (token.includes("a01ccae") || token.includes("6254cf9") || token.includes("0f96e673") || token.includes("23befa1b")) {
    return testSuiteRateLimiter;
  }
  if(teamId && teamId === process.env.DEV_B_TEAM_ID) {
    return devBRateLimiter;
  }
  const rateLimitConfig = RATE_LIMITS[mode]; // {default : 5}
  if (!rateLimitConfig) return serverRateLimiter;
--- a/apps/api/src/services/sentry.ts
+++ b/apps/api/src/services/sentry.ts
@ -0,0 +1,18 @@
 // Import with `import * as Sentry from "@sentry/node"` if you are using ESM
 import * as Sentry from "@sentry/node";
 import { nodeProfilingIntegration } from "@sentry/profiling-node";
 import { Logger } from "../lib/logger";
 if (process.env.SENTRY_DSN) {
  Logger.info("Setting up Sentry...");
  Sentry.init({
    dsn: process.env.SENTRY_DSN,
    integrations: [
      nodeProfilingIntegration(),
    ],
    tracesSampleRate: process.env.SENTRY_ENVIRONMENT === "dev" ? 1.0 : 0.045,
    profilesSampleRate: 1.0,
    serverName: process.env.FLY_MACHINE_ID,
    environment: process.env.SENTRY_ENVIRONMENT ?? "production",
  });
 }
--- a/apps/api/src/services/system-monitor.ts
+++ b/apps/api/src/services/system-monitor.ts
@ -0,0 +1,81 @@
 import si from 'systeminformation';
 import { Mutex } from "async-mutex";
 const MAX_CPU = process.env.MAX_CPU ? parseFloat(process.env.MAX_CPU) : 0.8;
 const MAX_RAM = process.env.MAX_RAM ? parseFloat(process.env.MAX_RAM) : 0.8;
 const CACHE_DURATION = process.env.SYS_INFO_MAX_CACHE_DURATION ? parseFloat(process.env.SYS_INFO_MAX_CACHE_DURATION) : 150;
 class SystemMonitor {
    private static instance: SystemMonitor;
    private static instanceMutex = new Mutex();
    private cpuUsageCache: number | null = null;
    private memoryUsageCache: number | null = null;
    private lastCpuCheck: number = 0;
    private lastMemoryCheck: number = 0;
    private constructor() {}
    public static async getInstance(): Promise<SystemMonitor> {
        if (SystemMonitor.instance) {
            return SystemMonitor.instance;
        }
        await this.instanceMutex.runExclusive(async () => {
            if (!SystemMonitor.instance) {
                SystemMonitor.instance = new SystemMonitor();
            }
        });
        return SystemMonitor.instance;
    }
    private async checkMemoryUsage() {
        const now = Date.now();
        if (this.memoryUsageCache !== null && (now - this.lastMemoryCheck) < CACHE_DURATION) {
            return this.memoryUsageCache;
        }
        const memoryData = await si.mem();
        const totalMemory = memoryData.total;
        const availableMemory = memoryData.available;
        const usedMemory = totalMemory - availableMemory;
        const usedMemoryPercentage = (usedMemory / totalMemory);
        this.memoryUsageCache = usedMemoryPercentage;
        this.lastMemoryCheck = now;
        return usedMemoryPercentage;
    }
    private async checkCpuUsage() {
        const now = Date.now();
        if (this.cpuUsageCache !== null && (now - this.lastCpuCheck) < CACHE_DURATION) {
            return this.cpuUsageCache;
        }
        const cpuData = await si.currentLoad();
        const cpuLoad = cpuData.currentLoad / 100;
        this.cpuUsageCache = cpuLoad;
        this.lastCpuCheck = now;
        return cpuLoad;
    }
    public async acceptConnection() {
        const cpuUsage = await this.checkCpuUsage();
        const memoryUsage = await this.checkMemoryUsage();
        return cpuUsage < MAX_CPU && memoryUsage < MAX_RAM;
    }
    public clearCache() {
        this.cpuUsageCache = null;
        this.memoryUsageCache = null;
        this.lastCpuCheck = 0;
        this.lastMemoryCheck = 0;
    }
 }
 export default SystemMonitor.getInstance();
--- a/apps/api/src/services/webhook.ts
+++ b/apps/api/src/services/webhook.ts
@ -1,15 +1,16 @@
 import { legacyDocumentConverter } from "../../src/controllers/v1/types";
 import { Logger } from "../../src/lib/logger";
 import { supabase_service } from "./supabase";
-export const callWebhook = async (teamId: string, jobId: string,data: any) => {
+export const callWebhook = async (teamId: string, jobId: string, data: any, specified?: string, v1 = false) => {
  try {
    const selfHostedUrl = process.env.SELF_HOSTED_WEBHOOK_URL?.replace("{{JOB_ID}}", jobId);
    const useDbAuthentication = process.env.USE_DB_AUTHENTICATION === 'true';
-    let webhookUrl = selfHostedUrl;
+    let webhookUrl = specified ?? selfHostedUrl;
-    // Only fetch the webhook URL from the database if the self-hosted webhook URL is not set
+    // Only fetch the webhook URL from the database if the self-hosted webhook URL and specified webhook are not set
    // and the USE_DB_AUTHENTICATION environment variable is set to true
-    if (!selfHostedUrl && useDbAuthentication) {
+    if (!webhookUrl && useDbAuthentication) {
      const { data: webhooksData, error } = await supabase_service
        .from("webhooks")
        .select("url")
@ -30,11 +31,15 @@ export const callWebhook = async (teamId: string, jobId: string,data: any) => {
    let dataToSend = [];
    if (data.result.links && data.result.links.length !== 0) {
      for (let i = 0; i < data.result.links.length; i++) {
-        dataToSend.push({
+        if (v1) {
-          content: data.result.links[i].content.content,
+          dataToSend.push(legacyDocumentConverter(data.result.links[i].content))
-          markdown: data.result.links[i].content.markdown,
+        } else {
-          metadata: data.result.links[i].content.metadata,
+          dataToSend.push({
-        });
+            content: data.result.links[i].content.content,
            markdown: data.result.links[i].content.markdown,
            metadata: data.result.links[i].content.metadata,
          });
        }
      }
    }
--- a/apps/api/src/types.ts
+++ b/apps/api/src/types.ts
@ -25,8 +25,14 @@ export interface WebScraperOptions {
  mode: Mode;
  crawlerOptions: any;
  pageOptions: any;
  extractorOptions?: any;
  team_id: string;
  origin?: string;
  crawl_id?: string;
  sitemapped?: boolean;
  webhook?: string;
  v1?: boolean;
  is_scrape?: boolean;
 }
 export interface RunWebScraperParams {
@ -34,11 +40,14 @@ export interface RunWebScraperParams {
  mode: Mode;
  crawlerOptions: any;
  pageOptions?: any;
  extractorOptions?: any;
  inProgress: (progress: any) => void;
-  onSuccess: (result: any) => void;
+  onSuccess: (result: any, mode: string) => void;
  onError: (error: Error) => void;
  team_id: string;
  bull_job_id: string;
  priority?: number;
  is_scrape?: boolean;
 }
 export interface RunWebScraperResult {
@ -63,6 +72,7 @@ export interface FirecrawlJob {
  extractor_options?: ExtractorOptions,
  num_tokens?: number,
  retry?: boolean,
  crawl_id?: string;
 }
 export interface FirecrawlScrapeResponse {
@ -99,6 +109,7 @@ export enum RateLimiterMode {
  Scrape = "scrape",
  Preview = "preview",
  Search = "search",
  Map = "map",
 }
@ -107,7 +118,8 @@ export interface AuthResponse {
  team_id?: string;
  error?: string;
  status?: number;
-  plan?: string;
+  api_key?: string;
  plan?: PlanType;
 }
@ -130,4 +142,15 @@ export type ScrapeLog = {
  html?: string;
  ipv4_support?: boolean | null;
  ipv6_support?: boolean | null;
-};
+};
 export type PlanType = 
  | "starter"
  | "standard"
  | "scale"
  | "hobby"
  | "standardnew"
  | "growth"
  | "growthdouble"
  | "free"
  | "";
--- a/apps/api/tsconfig.json
+++ b/apps/api/tsconfig.json
@ -2,16 +2,22 @@
  "compilerOptions": {
    "rootDir": "./src",
    "lib": ["es6","DOM"],
-    "target": "ES2020", // or higher
+
    // or higher
    "target": "ES2020",
    "module": "commonjs",
    "esModuleInterop": true,
    "sourceMap": true,
    "outDir": "./dist/src",
    "moduleResolution": "node",
    "baseUrl": ".",
    "paths": {
      "*": ["node_modules/*", "src/types/*"],
-    }
+    },
    "inlineSources": true
  },
  "include": ["src/","src/**/*", "services/db/supabase.ts", "utils/utils.ts", "services/db/supabaseEmbeddings.ts", "utils/EventEmmitter.ts", "src/services/queue-service.ts"]
 }
--- a/Show More
+++ b/Show More