diff --git a/.github/scripts/check_version_has_incremented.py b/.github/scripts/check_version_has_incremented.py index 345d0882..e437c934 100644 --- a/.github/scripts/check_version_has_incremented.py +++ b/.github/scripts/check_version_has_incremented.py @@ -1,24 +1,14 @@ """ -checks local verions against published verions. +checks local versions against published versions. # Usage: -Unix: python .github/scripts/check_version_has_incremented.py js ./apps/js-sdk/firecrawl @mendable/firecrawl-js - -Windows: -python .github\scripts\check_version_has_incremented.py js .\apps\js-sdk\firecrawl @mendable/firecrawl-js - Local version: 0.0.22 Published version: 0.0.21 true -Unix: python .github/scripts/check_version_has_incremented.py python ./apps/python-sdk/firecrawl firecrawl-py - -Windows: -python .github\scripts\check_version_has_incremented.py python .\apps\python-sdk\firecrawl firecrawl-py - Local version: 0.0.11 Published version: 0.0.11 false @@ -88,8 +78,8 @@ if __name__ == "__main__": raise ValueError("Invalid package type. Use 'python' or 'js'.") # Print versions for debugging - print(f"Local version: {current_version}") - print(f"Published version: {published_version}") + # print(f"Local version: {current_version}") + # print(f"Published version: {published_version}") # Compare versions and print result if is_version_incremented(current_version, published_version): diff --git a/.github/workflows/fly.yml b/.github/workflows/fly.yml index 3950c2ee..957a707e 100644 --- a/.github/workflows/fly.yml +++ b/.github/workflows/fly.yml @@ -3,8 +3,6 @@ on: push: branches: - main - schedule: - - cron: '0 */2 * * *' env: ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} @@ -25,9 +23,12 @@ env: SUPABASE_SERVICE_TOKEN: ${{ secrets.SUPABASE_SERVICE_TOKEN }} SUPABASE_URL: ${{ secrets.SUPABASE_URL }} TEST_API_KEY: ${{ secrets.TEST_API_KEY }} + PYPI_USERNAME: ${{ secrets.PYPI_USERNAME }} + PYPI_PASSWORD: ${{ secrets.PYPI_PASSWORD }} + NPM_TOKEN: ${{ secrets.NPM_TOKEN }} jobs: - pre-deploy: + pre-deploy-e2e-tests: name: Pre-deploy checks runs-on: ubuntu-latest services: @@ -61,7 +62,7 @@ jobs: pre-deploy-test-suite: name: Test Suite - needs: pre-deploy + needs: pre-deploy-e2e-tests runs-on: ubuntu-latest services: redis: @@ -94,19 +95,37 @@ jobs: run: | npm run test working-directory: ./apps/test-suite - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 - with: - python-version: ${{ matrix.python-version }} - - name: Install Python dependencies - run: | - python -m pip install --upgrade pip + + python-sdk-tests: + name: Python SDK Tests + needs: pre-deploy-e2e-tests + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.x' + - name: Install Python dependencies + run: | + python -m pip install --upgrade pip pip install -r requirements.txt working-directory: ./apps/python-sdk - name: Run E2E tests for Python SDK - run: | - pytest firecrawl/__tests__/e2e_withAuth/test.py + run: | + pytest firecrawl/__tests__/e2e_withAuth/test.py working-directory: ./apps/python-sdk + + js-sdk-tests: + name: JavaScript SDK Tests + needs: pre-deploy-e2e-tests + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Set up Node.js + uses: actions/setup-node@v3 + with: + node-version: "20" - name: Install dependencies for JavaScript SDK run: pnpm install working-directory: ./apps/js-sdk/firecrawl @@ -117,7 +136,7 @@ jobs: deploy: name: Deploy app runs-on: ubuntu-latest - needs: pre-deploy-test-suite + needs: [pre-deploy-test-suite, python-sdk-tests, js-sdk-tests] steps: - uses: actions/checkout@v3 - name: Change directory @@ -126,3 +145,83 @@ jobs: - run: flyctl deploy ./apps/api --remote-only -a firecrawl-scraper-js env: FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }} + + build-and-publish-python-sdk: + runs-on: ubuntu-latest + needs: deploy + + steps: + - name: Checkout repository + uses: actions/checkout@v3 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.x' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install setuptools wheel twine build requests packaging + + - name: Run version check script + id: version_check_script + run: | + PYTHON_SDK_VERSION_INCREMENTED=$(python .github/scripts/check_version_has_incremented.py python ./apps/python-sdk/firecrawl firecrawl-py) + echo "PYTHON_SDK_VERSION_INCREMENTED=$PYTHON_SDK_VERSION_INCREMENTED" >> $GITHUB_ENV + + - name: Build the package + if: ${{ env.PYTHON_SDK_VERSION_INCREMENTED == 'true' }} + run: | + python -m build + working-directory: ./apps/python-sdk + + - name: Publish to PyPI + if: ${{ env.VERSION_INCREMENTED == 'true' }} + env: + TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} + TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} + run: | + twine upload dist/* + working-directory: ./apps/python-sdk + + build-and-publish-js-sdk: + runs-on: ubuntu-latest + needs: deploy + + steps: + - uses: actions/checkout@v3 + - name: Set up Node.js + uses: actions/setup-node@v3 + with: + node-version: '20' + registry-url: 'https://registry.npmjs.org/' + scope: '@mendable' + always-auth: true + + - name: Install pnpm + run: npm install -g pnpm + + - name: Install python for running version check script + run: | + python -m pip install --upgrade pip + pip install setuptools wheel requests packaging + + - name: Install dependencies for JavaScript SDK + run: pnpm install + working-directory: ./apps/js-sdk/firecrawl + + - name: Run version check script + id: version_check_script + run: | + VERSION_INCREMENTED=$(python .github/scripts/check_version_has_incremented.py js ./apps/js-sdk/firecrawl @mendable/firecrawl-js) + echo "VERSION_INCREMENTED=$VERSION_INCREMENTED" >> $GITHUB_ENV + + - name: Build and publish to npm + if: ${{ env.VERSION_INCREMENTED == 'true' }} + env: + NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} + run: | + npm run build-and-publish + working-directory: ./apps/js-sdk/firecrawl + \ No newline at end of file diff --git a/.github/workflows/js-sdk.yml b/.github/workflows/js-sdk.yml index 3c914cc8..c84bb8b1 100644 --- a/.github/workflows/js-sdk.yml +++ b/.github/workflows/js-sdk.yml @@ -1,9 +1,7 @@ name: Run JavaScript SDK E2E Tests -on: - pull_request: - branches: - - main +on: [] + env: ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} BULL_AUTH_KEY: ${{ secrets.BULL_AUTH_KEY }} diff --git a/.github/workflows/publish-js-sdk.yml b/.github/workflows/publish-js-sdk.yml new file mode 100644 index 00000000..c02a654f --- /dev/null +++ b/.github/workflows/publish-js-sdk.yml @@ -0,0 +1,46 @@ +name: Publish JavaScript SDK + +on: [] + +env: + NPM_TOKEN: ${{ secrets.NPM_TOKEN }} + +jobs: + build-and-publish: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + - name: Set up Node.js + uses: actions/setup-node@v3 + with: + node-version: '20' + registry-url: 'https://registry.npmjs.org/' + scope: '@mendable' + always-auth: true + + - name: Install pnpm + run: npm install -g pnpm + + - name: Install python for running version check script + run: | + python -m pip install --upgrade pip + pip install setuptools wheel requests packaging + + - name: Install dependencies for JavaScript SDK + run: pnpm install + working-directory: ./apps/js-sdk/firecrawl + + - name: Run version check script + id: version_check_script + run: | + VERSION_INCREMENTED=$(python .github/scripts/check_version_has_incremented.py js ./apps/js-sdk/firecrawl @mendable/firecrawl-js) + echo "VERSION_INCREMENTED=$VERSION_INCREMENTED" >> $GITHUB_ENV + + - name: Build and publish to npm + if: ${{ env.VERSION_INCREMENTED == 'true' }} + env: + NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} + run: | + npm run build-and-publish + working-directory: ./apps/js-sdk/firecrawl diff --git a/.github/workflows/publish-python-sdk.yml b/.github/workflows/publish-python-sdk.yml new file mode 100644 index 00000000..6d86f1e0 --- /dev/null +++ b/.github/workflows/publish-python-sdk.yml @@ -0,0 +1,47 @@ +name: Publish Python SDK + +on: [] + +env: + PYPI_USERNAME: ${{ secrets.PYPI_USERNAME }} + PYPI_PASSWORD: ${{ secrets.PYPI_PASSWORD }} + +jobs: + build-and-publish: + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v3 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.x' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install setuptools wheel twine build requests packaging + + - name: Run version check script + id: version_check_script + run: | + VERSION_INCREMENTED=$(python .github/scripts/check_version_has_incremented.py python ./apps/python-sdk/firecrawl firecrawl-py) + echo "VERSION_INCREMENTED=$VERSION_INCREMENTED" >> $GITHUB_ENV + + - name: Build the package + if: ${{ env.VERSION_INCREMENTED == 'true' }} + run: | + python -m build + working-directory: ./apps/python-sdk + + - name: Publish to PyPI + if: ${{ env.VERSION_INCREMENTED == 'true' }} + env: + TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} + TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} + run: | + twine upload dist/* + working-directory: ./apps/python-sdk + diff --git a/.github/workflows/python-sdk.yml b/.github/workflows/python-sdk.yml index 1308cdef..27449888 100644 --- a/.github/workflows/python-sdk.yml +++ b/.github/workflows/python-sdk.yml @@ -1,9 +1,7 @@ name: Run Python SDK E2E Tests -on: - pull_request: - branches: - - main +on: [] + env: ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} BULL_AUTH_KEY: ${{ secrets.BULL_AUTH_KEY }} diff --git a/apps/api/src/__tests__/e2e_withAuth/index.test.ts b/apps/api/src/__tests__/e2e_withAuth/index.test.ts index f015acd3..f619254e 100644 --- a/apps/api/src/__tests__/e2e_withAuth/index.test.ts +++ b/apps/api/src/__tests__/e2e_withAuth/index.test.ts @@ -17,7 +17,7 @@ describe("E2E Tests for API Routes", () => { delete process.env.USE_DB_AUTHENTICATION; }); describe("GET /", () => { - it("should return Hello, world! message", async () => { + it.concurrent("should return Hello, world! message", async () => { const response = await request(TEST_URL).get("/"); expect(response.statusCode).toBe(200); @@ -26,7 +26,7 @@ describe("E2E Tests for API Routes", () => { }); describe("GET /test", () => { - it("should return Hello, world! message", async () => { + it.concurrent("should return Hello, world! message", async () => { const response = await request(TEST_URL).get("/test"); expect(response.statusCode).toBe(200); expect(response.text).toContain("Hello, world!"); @@ -34,12 +34,12 @@ describe("E2E Tests for API Routes", () => { }); describe("POST /v0/scrape", () => { - it("should require authorization", async () => { + it.concurrent("should require authorization", async () => { const response = await request(app).post("/v0/scrape"); expect(response.statusCode).toBe(401); }); - it("should return an error response with an invalid API key", async () => { + it.concurrent("should return an error response with an invalid API key", async () => { const response = await request(TEST_URL) .post("/v0/scrape") .set("Authorization", `Bearer invalid-api-key`) @@ -48,7 +48,7 @@ describe("E2E Tests for API Routes", () => { expect(response.statusCode).toBe(401); }); - it("should return an error for a blocklisted URL", async () => { + it.concurrent("should return an error for a blocklisted URL", async () => { const blocklistedUrl = "https://facebook.com/fake-test"; const response = await request(TEST_URL) .post("/v0/scrape") @@ -61,37 +61,38 @@ describe("E2E Tests for API Routes", () => { ); }); - it("should return a successful response with a valid preview token", async () => { - const response = await request(TEST_URL) - .post("/v0/scrape") - .set("Authorization", `Bearer this_is_just_a_preview_token`) - .set("Content-Type", "application/json") - .send({ url: "https://roastmywebsite.ai" }); - expect(response.statusCode).toBe(200); - }, 30000); // 30 seconds timeout + // tested on rate limit test + // it.concurrent("should return a successful response with a valid preview token", async () => { + // const response = await request(TEST_URL) + // .post("/v0/scrape") + // .set("Authorization", `Bearer this_is_just_a_preview_token`) + // .set("Content-Type", "application/json") + // .send({ url: "https://roastmywebsite.ai" }); + // expect(response.statusCode).toBe(200); + // }, 30000); // 30 seconds timeout - it("should return a successful response with a valid API key", async () => { + it.concurrent("should return a successful response with a valid API key", async () => { const response = await request(TEST_URL) .post("/v0/scrape") .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) .set("Content-Type", "application/json") - .send({ url: "https://firecrawl.dev" }); + .send({ url: "https://roastmywebsite.ai" }); expect(response.statusCode).toBe(200); expect(response.body).toHaveProperty("data"); expect(response.body.data).toHaveProperty("content"); expect(response.body.data).toHaveProperty("markdown"); expect(response.body.data).toHaveProperty("metadata"); expect(response.body.data).not.toHaveProperty("html"); - expect(response.body.data.content).toContain("🔥 Firecrawl"); + expect(response.body.data.content).toContain("_Roast_"); }, 30000); // 30 seconds timeout - it("should return a successful response with a valid API key and includeHtml set to true", async () => { + it.concurrent("should return a successful response with a valid API key and includeHtml set to true", async () => { const response = await request(TEST_URL) .post("/v0/scrape") .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) .set("Content-Type", "application/json") .send({ - url: "https://firecrawl.dev", + url: "https://roastmywebsite.ai", pageOptions: { includeHtml: true }, }); expect(response.statusCode).toBe(200); @@ -100,12 +101,12 @@ describe("E2E Tests for API Routes", () => { expect(response.body.data).toHaveProperty("markdown"); expect(response.body.data).toHaveProperty("html"); expect(response.body.data).toHaveProperty("metadata"); - expect(response.body.data.content).toContain("🔥 Firecrawl"); - expect(response.body.data.markdown).toContain("🔥 Firecrawl"); + expect(response.body.data.content).toContain("_Roast_"); + expect(response.body.data.markdown).toContain("_Roast_"); expect(response.body.data.html).toContain(" { + it.concurrent('should return a successful response for a valid scrape with PDF file', async () => { const response = await request(TEST_URL) .post('/v0/scrape') .set('Authorization', `Bearer ${process.env.TEST_API_KEY}`) @@ -120,7 +121,7 @@ describe("E2E Tests for API Routes", () => { expect(response.body.data.content).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy'); }, 60000); // 60 seconds - it('should return a successful response for a valid scrape with PDF file without explicit .pdf extension', async () => { + it.concurrent('should return a successful response for a valid scrape with PDF file without explicit .pdf extension', async () => { const response = await request(TEST_URL) .post('/v0/scrape') .set('Authorization', `Bearer ${process.env.TEST_API_KEY}`) @@ -136,7 +137,7 @@ describe("E2E Tests for API Routes", () => { }, 60000); // 60 seconds // TODO: add this test back once we nail the waitFor option to be more deterministic - // it("should return a successful response with a valid API key and waitFor option", async () => { + // it.concurrent("should return a successful response with a valid API key and waitFor option", async () => { // const startTime = Date.now(); // const response = await request(TEST_URL) // .post("/v0/scrape") @@ -158,12 +159,12 @@ describe("E2E Tests for API Routes", () => { }); describe("POST /v0/crawl", () => { - it("should require authorization", async () => { + it.concurrent("should require authorization", async () => { const response = await request(TEST_URL).post("/v0/crawl"); expect(response.statusCode).toBe(401); }); - it("should return an error response with an invalid API key", async () => { + it.concurrent("should return an error response with an invalid API key", async () => { const response = await request(TEST_URL) .post("/v0/crawl") .set("Authorization", `Bearer invalid-api-key`) @@ -172,7 +173,7 @@ describe("E2E Tests for API Routes", () => { expect(response.statusCode).toBe(401); }); - it("should return an error for a blocklisted URL", async () => { + it.concurrent("should return an error for a blocklisted URL", async () => { const blocklistedUrl = "https://twitter.com/fake-test"; const response = await request(TEST_URL) .post("/v0/crawl") @@ -185,7 +186,7 @@ describe("E2E Tests for API Routes", () => { ); }); - it("should return a successful response with a valid API key for crawl", async () => { + it.concurrent("should return a successful response with a valid API key for crawl", async () => { const response = await request(TEST_URL) .post("/v0/crawl") .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) @@ -197,7 +198,7 @@ describe("E2E Tests for API Routes", () => { /^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[1-5][0-9a-fA-F]{3}-[89abAB][0-9a-fA-F]{3}-[0-9a-fA-F]{12}$/ ); }); - it('should prevent duplicate requests using the same idempotency key', async () => { + it.concurrent('should prevent duplicate requests using the same idempotency key', async () => { const uniqueIdempotencyKey = uuidv4(); // First request with the idempotency key @@ -222,7 +223,7 @@ describe("E2E Tests for API Routes", () => { expect(secondResponse.body.error).toBe('Idempotency key already used'); }); - it("should return a successful response with a valid API key and valid includes option", async () => { + it.concurrent("should return a successful response with a valid API key and valid includes option", async () => { const crawlResponse = await request(TEST_URL) .post("/v0/crawl") .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) @@ -259,7 +260,6 @@ describe("E2E Tests for API Routes", () => { ); expect(urls.length).toBeGreaterThan(5); urls.forEach((url: string) => { - console.log({url}) expect(url.startsWith("https://www.mendable.ai/blog/")).toBeTruthy(); }); @@ -273,7 +273,7 @@ describe("E2E Tests for API Routes", () => { expect(completedResponse.body.data[0].content).toContain("Mendable"); }, 60000); // 60 seconds - it("should return a successful response with a valid API key and valid excludes option", async () => { + it.concurrent("should return a successful response with a valid API key and valid excludes option", async () => { const crawlResponse = await request(TEST_URL) .post("/v0/crawl") .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) @@ -314,7 +314,7 @@ describe("E2E Tests for API Routes", () => { }); }, 90000); // 90 seconds - it("should return a successful response with a valid API key and limit to 3", async () => { + it.concurrent("should return a successful response with a valid API key and limit to 3", async () => { const crawlResponse = await request(TEST_URL) .post("/v0/crawl") .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) @@ -354,7 +354,7 @@ describe("E2E Tests for API Routes", () => { expect(completedResponse.body.data[0].content).toContain("Mendable"); }, 60000); // 60 seconds - it("should return a successful response with max depth option for a valid crawl job", async () => { + it.concurrent("should return a successful response with max depth option for a valid crawl job", async () => { const crawlResponse = await request(TEST_URL) .post("/v0/crawl") .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) @@ -396,7 +396,7 @@ describe("E2E Tests for API Routes", () => { }); }, 120000); - // it("should return a successful response with a valid API key and valid limit option", async () => { + // it.concurrent("should return a successful response with a valid API key and valid limit option", async () => { // const crawlResponse = await request(TEST_URL) // .post("/v0/crawl") // .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) @@ -441,13 +441,13 @@ describe("E2E Tests for API Routes", () => { // expect(completedResponse.body.data[0].content).not.toContain("main menu"); // }, 60000); // 60 seconds - it("should return a successful response for a valid crawl job with includeHtml set to true option", async () => { + it.concurrent("should return a successful response for a valid crawl job with includeHtml set to true option", async () => { const crawlResponse = await request(TEST_URL) .post("/v0/crawl") .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) .set("Content-Type", "application/json") .send({ - url: "https://firecrawl.dev", + url: "https://roastmywebsite.ai", pageOptions: { includeHtml: true }, }); expect(crawlResponse.statusCode).toBe(200); @@ -486,19 +486,19 @@ describe("E2E Tests for API Routes", () => { // 120 seconds expect(completedResponse.body.data[0]).toHaveProperty("html"); expect(completedResponse.body.data[0]).toHaveProperty("metadata"); - expect(completedResponse.body.data[0].content).toContain("🔥 Firecrawl"); - expect(completedResponse.body.data[0].markdown).toContain("Firecrawl"); + expect(completedResponse.body.data[0].content).toContain("_Roast_"); + expect(completedResponse.body.data[0].markdown).toContain("_Roast_"); expect(completedResponse.body.data[0].html).toContain(" { - it("should require authorization", async () => { + it.concurrent("should require authorization", async () => { const response = await request(TEST_URL).post("/v0/crawlWebsitePreview"); expect(response.statusCode).toBe(401); }); - it("should return an error response with an invalid API key", async () => { + it.concurrent("should return an error response with an invalid API key", async () => { const response = await request(TEST_URL) .post("/v0/crawlWebsitePreview") .set("Authorization", `Bearer invalid-api-key`) @@ -507,7 +507,7 @@ describe("E2E Tests for API Routes", () => { expect(response.statusCode).toBe(401); }); - // it("should return an error for a blocklisted URL", async () => { + // it.concurrent("should return an error for a blocklisted URL", async () => { // const blocklistedUrl = "https://instagram.com/fake-test"; // const response = await request(TEST_URL) // .post("/v0/crawlWebsitePreview") @@ -519,7 +519,7 @@ describe("E2E Tests for API Routes", () => { // expect(response.body.error).toContain("Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it."); // }); - it("should return a timeout error when scraping takes longer than the specified timeout", async () => { + it.concurrent("should return a timeout error when scraping takes longer than the specified timeout", async () => { const response = await request(TEST_URL) .post("/v0/scrape") .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) @@ -529,27 +529,27 @@ describe("E2E Tests for API Routes", () => { expect(response.statusCode).toBe(408); }, 3000); - it("should return a successful response with a valid API key for crawlWebsitePreview", async () => { - const response = await request(TEST_URL) - .post("/v0/crawlWebsitePreview") - .set("Authorization", `Bearer this_is_just_a_preview_token`) - .set("Content-Type", "application/json") - .send({ url: "https://firecrawl.dev" }); - expect(response.statusCode).toBe(200); - expect(response.body).toHaveProperty("jobId"); - expect(response.body.jobId).toMatch( - /^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[1-5][0-9a-fA-F]{3}-[89abAB][0-9a-fA-F]{3}-[0-9a-fA-F]{12}$/ - ); - }); + // it.concurrent("should return a successful response with a valid API key for crawlWebsitePreview", async () => { + // const response = await request(TEST_URL) + // .post("/v0/crawlWebsitePreview") + // .set("Authorization", `Bearer this_is_just_a_preview_token`) + // .set("Content-Type", "application/json") + // .send({ url: "https://firecrawl.dev" }); + // expect(response.statusCode).toBe(200); + // expect(response.body).toHaveProperty("jobId"); + // expect(response.body.jobId).toMatch( + // /^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[1-5][0-9a-fA-F]{3}-[89abAB][0-9a-fA-F]{3}-[0-9a-fA-F]{12}$/ + // ); + // }); }); describe("POST /v0/search", () => { - it("should require authorization", async () => { + it.concurrent("should require authorization", async () => { const response = await request(TEST_URL).post("/v0/search"); expect(response.statusCode).toBe(401); }); - it("should return an error response with an invalid API key", async () => { + it.concurrent("should return an error response with an invalid API key", async () => { const response = await request(TEST_URL) .post("/v0/search") .set("Authorization", `Bearer invalid-api-key`) @@ -558,7 +558,7 @@ describe("E2E Tests for API Routes", () => { expect(response.statusCode).toBe(401); }); - it("should return a successful response with a valid API key for search", async () => { + it.concurrent("should return a successful response with a valid API key for search", async () => { const response = await request(TEST_URL) .post("/v0/search") .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) @@ -572,31 +572,31 @@ describe("E2E Tests for API Routes", () => { }); describe("GET /v0/crawl/status/:jobId", () => { - it("should require authorization", async () => { + it.concurrent("should require authorization", async () => { const response = await request(TEST_URL).get("/v0/crawl/status/123"); expect(response.statusCode).toBe(401); }); - it("should return an error response with an invalid API key", async () => { + it.concurrent("should return an error response with an invalid API key", async () => { const response = await request(TEST_URL) .get("/v0/crawl/status/123") .set("Authorization", `Bearer invalid-api-key`); expect(response.statusCode).toBe(401); }); - it("should return Job not found for invalid job ID", async () => { + it.concurrent("should return Job not found for invalid job ID", async () => { const response = await request(TEST_URL) .get("/v0/crawl/status/invalidJobId") .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`); expect(response.statusCode).toBe(404); }); - it("should return a successful crawl status response for a valid crawl job", async () => { + it.concurrent("should return a successful crawl status response for a valid crawl job", async () => { const crawlResponse = await request(TEST_URL) .post("/v0/crawl") .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) .set("Content-Type", "application/json") - .send({ url: "https://firecrawl.dev" }); + .send({ url: "https://roastmywebsite.ai" }); expect(crawlResponse.statusCode).toBe(200); let isCompleted = false; @@ -622,10 +622,10 @@ describe("E2E Tests for API Routes", () => { expect(completedResponse.body.data[0]).toHaveProperty("content"); expect(completedResponse.body.data[0]).toHaveProperty("markdown"); expect(completedResponse.body.data[0]).toHaveProperty("metadata"); - expect(completedResponse.body.data[0].content).toContain("🔥 Firecrawl"); - }, 60000); // 60 seconds + expect(completedResponse.body.data[0].content).toContain("_Roast_"); + }, 120000); // 120 seconds - it('should return a successful response for a valid crawl job with PDF files without explicit .pdf extension', async () => { + it.concurrent('should return a successful response for a valid crawl job with PDF files without explicit .pdf extension', async () => { const crawlResponse = await request(TEST_URL) .post('/v0/crawl') .set('Authorization', `Bearer ${process.env.TEST_API_KEY}`) @@ -660,9 +660,9 @@ describe("E2E Tests for API Routes", () => { }) ]) ); - }, 60000); // 60 seconds + }, 120000); // 120 seconds - it("should return a successful response with max depth option for a valid crawl job", async () => { + it.concurrent("should return a successful response with max depth option for a valid crawl job", async () => { const crawlResponse = await request(TEST_URL) .post("/v0/crawl") .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) @@ -705,15 +705,15 @@ describe("E2E Tests for API Routes", () => { const depth = new URL(url).pathname.split("/").filter(Boolean).length; expect(depth).toBeLessThanOrEqual(1); }); - }, 120000); + }, 180000); - it("should return a successful response for a valid crawl job with includeHtml set to true option", async () => { + it.concurrent("should return a successful response for a valid crawl job with includeHtml set to true option", async () => { const crawlResponse = await request(TEST_URL) .post("/v0/crawl") .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) .set("Content-Type", "application/json") .send({ - url: "https://firecrawl.dev", + url: "https://roastmywebsite.ai", pageOptions: { includeHtml: true }, }); expect(crawlResponse.statusCode).toBe(200); @@ -725,12 +725,23 @@ describe("E2E Tests for API Routes", () => { expect(response.body).toHaveProperty("status"); expect(response.body.status).toBe("active"); - // wait for 30 seconds - await new Promise((r) => setTimeout(r, 30000)); + let isFinished = false; + let completedResponse; - const completedResponse = await request(TEST_URL) - .get(`/v0/crawl/status/${crawlResponse.body.jobId}`) - .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`); + while (!isFinished) { + const response = await request(TEST_URL) + .get(`/v0/crawl/status/${crawlResponse.body.jobId}`) + .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`); + expect(response.statusCode).toBe(200); + expect(response.body).toHaveProperty("status"); + + if (response.body.status === "completed") { + isFinished = true; + completedResponse = response; + } else { + await new Promise((r) => setTimeout(r, 1000)); // Wait for 1 second before checking again + } + } expect(completedResponse.statusCode).toBe(200); expect(completedResponse.body).toHaveProperty("status"); @@ -739,17 +750,14 @@ describe("E2E Tests for API Routes", () => { expect(completedResponse.body.data[0]).toHaveProperty("content"); expect(completedResponse.body.data[0]).toHaveProperty("markdown"); expect(completedResponse.body.data[0]).toHaveProperty("metadata"); - - // 120 seconds expect(completedResponse.body.data[0]).toHaveProperty("html"); - expect(completedResponse.body.data[0]).toHaveProperty("metadata"); - expect(completedResponse.body.data[0].content).toContain("🔥 Firecrawl"); - expect(completedResponse.body.data[0].markdown).toContain("Firecrawl"); + expect(completedResponse.body.data[0].content).toContain("_Roast_"); + expect(completedResponse.body.data[0].markdown).toContain("_Roast_"); expect(completedResponse.body.data[0].html).toContain(" { + it.concurrent("If someone cancels a crawl job, it should turn into failed status", async () => { const crawlResponse = await request(TEST_URL) .post("/v0/crawl") .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) @@ -785,7 +793,7 @@ describe("E2E Tests for API Routes", () => { }, 60000); // 60 seconds describe("POST /v0/scrape with LLM Extraction", () => { - it("should extract data using LLM extraction mode", async () => { + it.concurrent("should extract data using LLM extraction mode", async () => { const response = await request(TEST_URL) .post("/v0/scrape") .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) @@ -836,7 +844,7 @@ describe("E2E Tests for API Routes", () => { }); // describe("POST /v0/scrape for Top 100 Companies", () => { - // it("should extract data for the top 100 companies", async () => { + // it.concurrent("should extract data for the top 100 companies", async () => { // const response = await request(TEST_URL) // .post("/v0/scrape") // .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) @@ -894,7 +902,7 @@ describe("E2E Tests for API Routes", () => { // }); describe("POST /v0/crawl with fast mode", () => { - it("should complete the crawl under 20 seconds", async () => { + it.concurrent("should complete the crawl under 20 seconds", async () => { const startTime = Date.now(); const crawlResponse = await request(TEST_URL) @@ -927,10 +935,10 @@ describe("E2E Tests for API Routes", () => { } } - const endTime = Date.now(); - const timeElapsed = (endTime - startTime) / 1000; // Convert to seconds + // const endTime = Date.now(); + // const timeElapsed = (endTime - startTime) / 1000; // Convert to seconds - console.log(`Time elapsed: ${timeElapsed} seconds`); + // console.log(`Time elapsed: ${timeElapsed} seconds`); expect(statusResponse.body.status).toBe("completed"); expect(statusResponse.body).toHaveProperty("data"); @@ -945,7 +953,7 @@ describe("E2E Tests for API Routes", () => { }, 20000); - // it("should complete the crawl in more than 10 seconds", async () => { + // it.concurrent("should complete the crawl in more than 10 seconds", async () => { // const startTime = Date.now(); // const crawlResponse = await request(TEST_URL) @@ -995,7 +1003,7 @@ describe("E2E Tests for API Routes", () => { }); describe("GET /is-production", () => { - it("should return the production status", async () => { + it.concurrent("should return the production status", async () => { const response = await request(TEST_URL).get("/is-production"); expect(response.statusCode).toBe(200); expect(response.body).toHaveProperty("isProduction"); @@ -1003,8 +1011,8 @@ describe("E2E Tests for API Routes", () => { }); describe("Rate Limiter", () => { - it("should return 429 when rate limit is exceeded for preview token", async () => { - for (let i = 0; i < 4; i++) { + it.concurrent("should return 429 when rate limit is exceeded for preview token", async () => { + for (let i = 0; i < 5; i++) { const response = await request(TEST_URL) .post("/v0/scrape") .set("Authorization", `Bearer this_is_just_a_preview_token`) @@ -1020,10 +1028,10 @@ describe("E2E Tests for API Routes", () => { .send({ url: "https://www.scrapethissite.com" }); expect(response.statusCode).toBe(429); - }, 60000); + }, 90000); }); - // it("should return 429 when rate limit is exceeded for API key", async () => { + // it.concurrent("should return 429 when rate limit is exceeded for API key", async () => { // for (let i = 0; i < parseInt(process.env.RATE_LIMIT_TEST_API_KEY_SCRAPE); i++) { // const response = await request(TEST_URL) // .post("/v0/scrape") @@ -1043,7 +1051,7 @@ describe("E2E Tests for API Routes", () => { // expect(response.statusCode).toBe(429); // }, 60000); - // it("should return 429 when rate limit is exceeded for API key", async () => { + // it.concurrent("should return 429 when rate limit is exceeded for API key", async () => { // for (let i = 0; i < parseInt(process.env.RATE_LIMIT_TEST_API_KEY_CRAWL); i++) { // const response = await request(TEST_URL) // .post("/v0/crawl") diff --git a/apps/js-sdk/firecrawl/package.json b/apps/js-sdk/firecrawl/package.json index d3581b35..8f9e470f 100644 --- a/apps/js-sdk/firecrawl/package.json +++ b/apps/js-sdk/firecrawl/package.json @@ -1,13 +1,13 @@ { "name": "@mendable/firecrawl-js", - "version": "0.0.23", + "version": "0.0.25", "description": "JavaScript SDK for Firecrawl API", "main": "build/index.js", "types": "types/index.d.ts", "type": "module", "scripts": { "build": "tsc", - "publish": "npm run build && npm publish --access public", + "build-and-publish": "npm run build && npm publish --access public", "publish-beta": "npm run build && npm publish --access public --tag beta", "test": "jest src/__tests__/**/*.test.ts" }, diff --git a/apps/js-sdk/firecrawl/src/__tests__/e2e_withAuth/index.test.ts b/apps/js-sdk/firecrawl/src/__tests__/e2e_withAuth/index.test.ts index 16b7ab79..c7dde697 100644 --- a/apps/js-sdk/firecrawl/src/__tests__/e2e_withAuth/index.test.ts +++ b/apps/js-sdk/firecrawl/src/__tests__/e2e_withAuth/index.test.ts @@ -8,94 +8,94 @@ const TEST_API_KEY = process.env.TEST_API_KEY; const API_URL = process.env.API_URL; describe('FirecrawlApp E2E Tests', () => { - test('should throw error for no API key', () => { + test.concurrent('should throw error for no API key', () => { expect(() => { new FirecrawlApp({ apiKey: null, apiUrl: API_URL }); }).toThrow("No API key provided"); }); - test('should throw error for invalid API key on scrape', async () => { + test.concurrent('should throw error for invalid API key on scrape', async () => { const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL }); - await expect(invalidApp.scrapeUrl('https://firecrawl.dev')).rejects.toThrow("Request failed with status code 401"); + await expect(invalidApp.scrapeUrl('https://roastmywebsite.ai')).rejects.toThrow("Request failed with status code 401"); }); - test('should throw error for blocklisted URL on scrape', async () => { + test.concurrent('should throw error for blocklisted URL on scrape', async () => { const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); const blocklistedUrl = "https://facebook.com/fake-test"; await expect(app.scrapeUrl(blocklistedUrl)).rejects.toThrow("Request failed with status code 403"); }); - test('should return successful response with valid preview token', async () => { + test.concurrent('should return successful response with valid preview token', async () => { const app = new FirecrawlApp({ apiKey: "this_is_just_a_preview_token", apiUrl: API_URL }); - const response = await app.scrapeUrl('https://firecrawl.dev'); + const response = await app.scrapeUrl('https://roastmywebsite.ai'); expect(response).not.toBeNull(); - expect(response.data.content).toContain("🔥 Firecrawl"); + expect(response.data.content).toContain("_Roast_"); }, 30000); // 30 seconds timeout - test('should return successful response for valid scrape', async () => { + test.concurrent('should return successful response for valid scrape', async () => { const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); - const response = await app.scrapeUrl('https://firecrawl.dev'); + const response = await app.scrapeUrl('https://roastmywebsite.ai'); expect(response).not.toBeNull(); - expect(response.data.content).toContain("🔥 Firecrawl"); + expect(response.data.content).toContain("_Roast_"); expect(response.data).toHaveProperty('markdown'); expect(response.data).toHaveProperty('metadata'); expect(response.data).not.toHaveProperty('html'); }, 30000); // 30 seconds timeout - test('should return successful response with valid API key and include HTML', async () => { + test.concurrent('should return successful response with valid API key and include HTML', async () => { const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); - const response = await app.scrapeUrl('https://firecrawl.dev', { pageOptions: { includeHtml: true } }); + const response = await app.scrapeUrl('https://roastmywebsite.ai', { pageOptions: { includeHtml: true } }); expect(response).not.toBeNull(); - expect(response.data.content).toContain("🔥 Firecrawl"); - expect(response.data.markdown).toContain("🔥 Firecrawl"); + expect(response.data.content).toContain("_Roast_"); + expect(response.data.markdown).toContain("_Roast_"); expect(response.data.html).toContain(" { + test.concurrent('should return successful response for valid scrape with PDF file', async () => { const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001.pdf'); expect(response).not.toBeNull(); expect(response.data.content).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy'); }, 30000); // 30 seconds timeout - test('should return successful response for valid scrape with PDF file without explicit extension', async () => { + test.concurrent('should return successful response for valid scrape with PDF file without explicit extension', async () => { const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001'); expect(response).not.toBeNull(); expect(response.data.content).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy'); }, 30000); // 30 seconds timeout - test('should throw error for invalid API key on crawl', async () => { + test.concurrent('should throw error for invalid API key on crawl', async () => { const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL }); - await expect(invalidApp.crawlUrl('https://firecrawl.dev')).rejects.toThrow("Request failed with status code 401"); + await expect(invalidApp.crawlUrl('https://roastmywebsite.ai')).rejects.toThrow("Request failed with status code 401"); }); - test('should throw error for blocklisted URL on crawl', async () => { + test.concurrent('should throw error for blocklisted URL on crawl', async () => { const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); const blocklistedUrl = "https://twitter.com/fake-test"; await expect(app.crawlUrl(blocklistedUrl)).rejects.toThrow("Request failed with status code 403"); }); - test('should return successful response for crawl and wait for completion', async () => { + test.concurrent('should return successful response for crawl and wait for completion', async () => { const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); - const response = await app.crawlUrl('https://firecrawl.dev', { crawlerOptions: { excludes: ['blog/*'] } }, true, 30); + const response = await app.crawlUrl('https://roastmywebsite.ai', { crawlerOptions: { excludes: ['blog/*'] } }, true, 30); expect(response).not.toBeNull(); - expect(response[0].content).toContain("🔥 Firecrawl"); + expect(response[0].content).toContain("_Roast_"); }, 60000); // 60 seconds timeout - test('should handle idempotency key for crawl', async () => { + test.concurrent('should handle idempotency key for crawl', async () => { const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); const uniqueIdempotencyKey = uuidv4(); - const response = await app.crawlUrl('https://firecrawl.dev', { crawlerOptions: { excludes: ['blog/*'] } }, false, 2, uniqueIdempotencyKey); + const response = await app.crawlUrl('https://roastmywebsite.ai', { crawlerOptions: { excludes: ['blog/*'] } }, false, 2, uniqueIdempotencyKey); expect(response).not.toBeNull(); expect(response.jobId).toBeDefined(); - await expect(app.crawlUrl('https://firecrawl.dev', { crawlerOptions: { excludes: ['blog/*'] } }, true, 2, uniqueIdempotencyKey)).rejects.toThrow("Request failed with status code 409"); + await expect(app.crawlUrl('https://roastmywebsite.ai', { crawlerOptions: { excludes: ['blog/*'] } }, true, 2, uniqueIdempotencyKey)).rejects.toThrow("Request failed with status code 409"); }); - test('should check crawl status', async () => { + test.concurrent('should check crawl status', async () => { const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); - const response = await app.crawlUrl('https://firecrawl.dev', { crawlerOptions: { excludes: ['blog/*'] } }, false); + const response = await app.crawlUrl('https://roastmywebsite.ai', { crawlerOptions: { excludes: ['blog/*'] } }, false); expect(response).not.toBeNull(); expect(response.jobId).toBeDefined(); @@ -115,7 +115,7 @@ describe('FirecrawlApp E2E Tests', () => { expect(statusResponse.data.length).toBeGreaterThan(0); }, 35000); // 35 seconds timeout - test('should return successful response for search', async () => { + test.concurrent('should return successful response for search', async () => { const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); const response = await app.search("test query"); expect(response).not.toBeNull(); @@ -123,12 +123,12 @@ describe('FirecrawlApp E2E Tests', () => { expect(response.data.length).toBeGreaterThan(2); }, 30000); // 30 seconds timeout - test('should throw error for invalid API key on search', async () => { + test.concurrent('should throw error for invalid API key on search', async () => { const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL }); await expect(invalidApp.search("test query")).rejects.toThrow("Request failed with status code 401"); }); - test('should perform LLM extraction', async () => { + test.concurrent('should perform LLM extraction', async () => { const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); const response = await app.scrapeUrl("https://mendable.ai", { extractorOptions: { diff --git a/apps/python-sdk/firecrawl/__init__.py b/apps/python-sdk/firecrawl/__init__.py index 68999159..b82039e6 100644 --- a/apps/python-sdk/firecrawl/__init__.py +++ b/apps/python-sdk/firecrawl/__init__.py @@ -1,3 +1,3 @@ from .firecrawl import FirecrawlApp -__version__ = "0.0.11" +__version__ = "0.0.13" diff --git a/apps/python-sdk/firecrawl/__tests__/e2e_withAuth/test.py b/apps/python-sdk/firecrawl/__tests__/e2e_withAuth/test.py index 86ce1f9f..90a6498c 100644 --- a/apps/python-sdk/firecrawl/__tests__/e2e_withAuth/test.py +++ b/apps/python-sdk/firecrawl/__tests__/e2e_withAuth/test.py @@ -38,31 +38,31 @@ def test_blocklisted_url(): def test_successful_response_with_valid_preview_token(): app = FirecrawlApp(api_url=API_URL, api_key="this_is_just_a_preview_token") - response = app.scrape_url('https://firecrawl.dev') + response = app.scrape_url('https://roastmywebsite.ai') assert response is not None assert 'content' in response - assert "🔥 Firecrawl" in response['content'] + assert "_Roast_" in response['content'] def test_scrape_url_e2e(): app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY) - response = app.scrape_url('https://firecrawl.dev') + response = app.scrape_url('https://roastmywebsite.ai') assert response is not None assert 'content' in response assert 'markdown' in response assert 'metadata' in response assert 'html' not in response - assert "🔥 Firecrawl" in response['content'] + assert "_Roast_" in response['content'] def test_successful_response_with_valid_api_key_and_include_html(): app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY) - response = app.scrape_url('https://firecrawl.dev', {'pageOptions': {'includeHtml': True}}) + response = app.scrape_url('https://roastmywebsite.ai', {'pageOptions': {'includeHtml': True}}) assert response is not None assert 'content' in response assert 'markdown' in response assert 'html' in response assert 'metadata' in response - assert "🔥 Firecrawl" in response['content'] - assert "🔥 Firecrawl" in response['markdown'] + assert "_Roast_" in response['content'] + assert "_Roast_" in response['markdown'] assert " 0 assert 'content' in response[0] - assert "🔥 Firecrawl" in response[0]['content'] + assert "_Roast_" in response[0]['content'] def test_crawl_url_with_idempotency_key_e2e(): app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY) uniqueIdempotencyKey = str(uuid4()) - response = app.crawl_url('https://firecrawl.dev', {'crawlerOptions': {'excludes': ['blog/*']}}, True, 2, uniqueIdempotencyKey) + response = app.crawl_url('https://roastmywebsite.ai', {'crawlerOptions': {'excludes': ['blog/*']}}, True, 2, uniqueIdempotencyKey) assert response is not None assert len(response) > 0 assert 'content' in response[0] - assert "🔥 Firecrawl" in response[0]['content'] + assert "_Roast_" in response[0]['content'] with pytest.raises(Exception) as excinfo: app.crawl_url('https://firecrawl.dev', {'crawlerOptions': {'excludes': ['blog/*']}}, True, 2, uniqueIdempotencyKey)