mirror of
https://github.com/mendableai/firecrawl.git
synced 2024-11-16 11:42:24 +08:00
Merge pull request #246 from mendableai/194-sdk-ci-pipeline-for-publishing-pythonnode-sdk
[Feat] CI/CD for publishing js and python SDKs
This commit is contained in:
commit
4c3bfe4eb5
16
.github/scripts/check_version_has_incremented.py
vendored
16
.github/scripts/check_version_has_incremented.py
vendored
|
@ -1,24 +1,14 @@
|
||||||
"""
|
"""
|
||||||
checks local verions against published verions.
|
checks local versions against published versions.
|
||||||
|
|
||||||
# Usage:
|
# Usage:
|
||||||
|
|
||||||
Unix:
|
|
||||||
python .github/scripts/check_version_has_incremented.py js ./apps/js-sdk/firecrawl @mendable/firecrawl-js
|
python .github/scripts/check_version_has_incremented.py js ./apps/js-sdk/firecrawl @mendable/firecrawl-js
|
||||||
|
|
||||||
Windows:
|
|
||||||
python .github\scripts\check_version_has_incremented.py js .\apps\js-sdk\firecrawl @mendable/firecrawl-js
|
|
||||||
|
|
||||||
Local version: 0.0.22
|
Local version: 0.0.22
|
||||||
Published version: 0.0.21
|
Published version: 0.0.21
|
||||||
true
|
true
|
||||||
|
|
||||||
Unix:
|
|
||||||
python .github/scripts/check_version_has_incremented.py python ./apps/python-sdk/firecrawl firecrawl-py
|
python .github/scripts/check_version_has_incremented.py python ./apps/python-sdk/firecrawl firecrawl-py
|
||||||
|
|
||||||
Windows:
|
|
||||||
python .github\scripts\check_version_has_incremented.py python .\apps\python-sdk\firecrawl firecrawl-py
|
|
||||||
|
|
||||||
Local version: 0.0.11
|
Local version: 0.0.11
|
||||||
Published version: 0.0.11
|
Published version: 0.0.11
|
||||||
false
|
false
|
||||||
|
@ -88,8 +78,8 @@ if __name__ == "__main__":
|
||||||
raise ValueError("Invalid package type. Use 'python' or 'js'.")
|
raise ValueError("Invalid package type. Use 'python' or 'js'.")
|
||||||
|
|
||||||
# Print versions for debugging
|
# Print versions for debugging
|
||||||
print(f"Local version: {current_version}")
|
# print(f"Local version: {current_version}")
|
||||||
print(f"Published version: {published_version}")
|
# print(f"Published version: {published_version}")
|
||||||
|
|
||||||
# Compare versions and print result
|
# Compare versions and print result
|
||||||
if is_version_incremented(current_version, published_version):
|
if is_version_incremented(current_version, published_version):
|
||||||
|
|
127
.github/workflows/fly.yml
vendored
127
.github/workflows/fly.yml
vendored
|
@ -3,8 +3,6 @@ on:
|
||||||
push:
|
push:
|
||||||
branches:
|
branches:
|
||||||
- main
|
- main
|
||||||
schedule:
|
|
||||||
- cron: '0 */2 * * *'
|
|
||||||
|
|
||||||
env:
|
env:
|
||||||
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
|
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
|
||||||
|
@ -25,9 +23,12 @@ env:
|
||||||
SUPABASE_SERVICE_TOKEN: ${{ secrets.SUPABASE_SERVICE_TOKEN }}
|
SUPABASE_SERVICE_TOKEN: ${{ secrets.SUPABASE_SERVICE_TOKEN }}
|
||||||
SUPABASE_URL: ${{ secrets.SUPABASE_URL }}
|
SUPABASE_URL: ${{ secrets.SUPABASE_URL }}
|
||||||
TEST_API_KEY: ${{ secrets.TEST_API_KEY }}
|
TEST_API_KEY: ${{ secrets.TEST_API_KEY }}
|
||||||
|
PYPI_USERNAME: ${{ secrets.PYPI_USERNAME }}
|
||||||
|
PYPI_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
|
||||||
|
NPM_TOKEN: ${{ secrets.NPM_TOKEN }}
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
pre-deploy:
|
pre-deploy-e2e-tests:
|
||||||
name: Pre-deploy checks
|
name: Pre-deploy checks
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
services:
|
services:
|
||||||
|
@ -61,7 +62,7 @@ jobs:
|
||||||
|
|
||||||
pre-deploy-test-suite:
|
pre-deploy-test-suite:
|
||||||
name: Test Suite
|
name: Test Suite
|
||||||
needs: pre-deploy
|
needs: pre-deploy-e2e-tests
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
services:
|
services:
|
||||||
redis:
|
redis:
|
||||||
|
@ -94,19 +95,37 @@ jobs:
|
||||||
run: |
|
run: |
|
||||||
npm run test
|
npm run test
|
||||||
working-directory: ./apps/test-suite
|
working-directory: ./apps/test-suite
|
||||||
- name: Set up Python ${{ matrix.python-version }}
|
|
||||||
uses: actions/setup-python@v4
|
python-sdk-tests:
|
||||||
with:
|
name: Python SDK Tests
|
||||||
python-version: ${{ matrix.python-version }}
|
needs: pre-deploy-e2e-tests
|
||||||
- name: Install Python dependencies
|
runs-on: ubuntu-latest
|
||||||
run: |
|
steps:
|
||||||
python -m pip install --upgrade pip
|
- uses: actions/checkout@v3
|
||||||
|
- name: Set up Python
|
||||||
|
uses: actions/setup-python@v4
|
||||||
|
with:
|
||||||
|
python-version: '3.x'
|
||||||
|
- name: Install Python dependencies
|
||||||
|
run: |
|
||||||
|
python -m pip install --upgrade pip
|
||||||
pip install -r requirements.txt
|
pip install -r requirements.txt
|
||||||
working-directory: ./apps/python-sdk
|
working-directory: ./apps/python-sdk
|
||||||
- name: Run E2E tests for Python SDK
|
- name: Run E2E tests for Python SDK
|
||||||
run: |
|
run: |
|
||||||
pytest firecrawl/__tests__/e2e_withAuth/test.py
|
pytest firecrawl/__tests__/e2e_withAuth/test.py
|
||||||
working-directory: ./apps/python-sdk
|
working-directory: ./apps/python-sdk
|
||||||
|
|
||||||
|
js-sdk-tests:
|
||||||
|
name: JavaScript SDK Tests
|
||||||
|
needs: pre-deploy-e2e-tests
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v3
|
||||||
|
- name: Set up Node.js
|
||||||
|
uses: actions/setup-node@v3
|
||||||
|
with:
|
||||||
|
node-version: "20"
|
||||||
- name: Install dependencies for JavaScript SDK
|
- name: Install dependencies for JavaScript SDK
|
||||||
run: pnpm install
|
run: pnpm install
|
||||||
working-directory: ./apps/js-sdk/firecrawl
|
working-directory: ./apps/js-sdk/firecrawl
|
||||||
|
@ -117,7 +136,7 @@ jobs:
|
||||||
deploy:
|
deploy:
|
||||||
name: Deploy app
|
name: Deploy app
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
needs: pre-deploy-test-suite
|
needs: [pre-deploy-test-suite, python-sdk-tests, js-sdk-tests]
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v3
|
- uses: actions/checkout@v3
|
||||||
- name: Change directory
|
- name: Change directory
|
||||||
|
@ -126,3 +145,83 @@ jobs:
|
||||||
- run: flyctl deploy ./apps/api --remote-only -a firecrawl-scraper-js
|
- run: flyctl deploy ./apps/api --remote-only -a firecrawl-scraper-js
|
||||||
env:
|
env:
|
||||||
FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }}
|
FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }}
|
||||||
|
|
||||||
|
build-and-publish-python-sdk:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
needs: deploy
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Checkout repository
|
||||||
|
uses: actions/checkout@v3
|
||||||
|
|
||||||
|
- name: Set up Python
|
||||||
|
uses: actions/setup-python@v4
|
||||||
|
with:
|
||||||
|
python-version: '3.x'
|
||||||
|
|
||||||
|
- name: Install dependencies
|
||||||
|
run: |
|
||||||
|
python -m pip install --upgrade pip
|
||||||
|
pip install setuptools wheel twine build requests packaging
|
||||||
|
|
||||||
|
- name: Run version check script
|
||||||
|
id: version_check_script
|
||||||
|
run: |
|
||||||
|
PYTHON_SDK_VERSION_INCREMENTED=$(python .github/scripts/check_version_has_incremented.py python ./apps/python-sdk/firecrawl firecrawl-py)
|
||||||
|
echo "PYTHON_SDK_VERSION_INCREMENTED=$PYTHON_SDK_VERSION_INCREMENTED" >> $GITHUB_ENV
|
||||||
|
|
||||||
|
- name: Build the package
|
||||||
|
if: ${{ env.PYTHON_SDK_VERSION_INCREMENTED == 'true' }}
|
||||||
|
run: |
|
||||||
|
python -m build
|
||||||
|
working-directory: ./apps/python-sdk
|
||||||
|
|
||||||
|
- name: Publish to PyPI
|
||||||
|
if: ${{ env.VERSION_INCREMENTED == 'true' }}
|
||||||
|
env:
|
||||||
|
TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
|
||||||
|
TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
|
||||||
|
run: |
|
||||||
|
twine upload dist/*
|
||||||
|
working-directory: ./apps/python-sdk
|
||||||
|
|
||||||
|
build-and-publish-js-sdk:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
needs: deploy
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v3
|
||||||
|
- name: Set up Node.js
|
||||||
|
uses: actions/setup-node@v3
|
||||||
|
with:
|
||||||
|
node-version: '20'
|
||||||
|
registry-url: 'https://registry.npmjs.org/'
|
||||||
|
scope: '@mendable'
|
||||||
|
always-auth: true
|
||||||
|
|
||||||
|
- name: Install pnpm
|
||||||
|
run: npm install -g pnpm
|
||||||
|
|
||||||
|
- name: Install python for running version check script
|
||||||
|
run: |
|
||||||
|
python -m pip install --upgrade pip
|
||||||
|
pip install setuptools wheel requests packaging
|
||||||
|
|
||||||
|
- name: Install dependencies for JavaScript SDK
|
||||||
|
run: pnpm install
|
||||||
|
working-directory: ./apps/js-sdk/firecrawl
|
||||||
|
|
||||||
|
- name: Run version check script
|
||||||
|
id: version_check_script
|
||||||
|
run: |
|
||||||
|
VERSION_INCREMENTED=$(python .github/scripts/check_version_has_incremented.py js ./apps/js-sdk/firecrawl @mendable/firecrawl-js)
|
||||||
|
echo "VERSION_INCREMENTED=$VERSION_INCREMENTED" >> $GITHUB_ENV
|
||||||
|
|
||||||
|
- name: Build and publish to npm
|
||||||
|
if: ${{ env.VERSION_INCREMENTED == 'true' }}
|
||||||
|
env:
|
||||||
|
NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
|
||||||
|
run: |
|
||||||
|
npm run build-and-publish
|
||||||
|
working-directory: ./apps/js-sdk/firecrawl
|
||||||
|
|
6
.github/workflows/js-sdk.yml
vendored
6
.github/workflows/js-sdk.yml
vendored
|
@ -1,9 +1,7 @@
|
||||||
name: Run JavaScript SDK E2E Tests
|
name: Run JavaScript SDK E2E Tests
|
||||||
|
|
||||||
on:
|
on: []
|
||||||
pull_request:
|
|
||||||
branches:
|
|
||||||
- main
|
|
||||||
env:
|
env:
|
||||||
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
|
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
|
||||||
BULL_AUTH_KEY: ${{ secrets.BULL_AUTH_KEY }}
|
BULL_AUTH_KEY: ${{ secrets.BULL_AUTH_KEY }}
|
||||||
|
|
46
.github/workflows/publish-js-sdk.yml
vendored
Normal file
46
.github/workflows/publish-js-sdk.yml
vendored
Normal file
|
@ -0,0 +1,46 @@
|
||||||
|
name: Publish JavaScript SDK
|
||||||
|
|
||||||
|
on: []
|
||||||
|
|
||||||
|
env:
|
||||||
|
NPM_TOKEN: ${{ secrets.NPM_TOKEN }}
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
build-and-publish:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v3
|
||||||
|
- name: Set up Node.js
|
||||||
|
uses: actions/setup-node@v3
|
||||||
|
with:
|
||||||
|
node-version: '20'
|
||||||
|
registry-url: 'https://registry.npmjs.org/'
|
||||||
|
scope: '@mendable'
|
||||||
|
always-auth: true
|
||||||
|
|
||||||
|
- name: Install pnpm
|
||||||
|
run: npm install -g pnpm
|
||||||
|
|
||||||
|
- name: Install python for running version check script
|
||||||
|
run: |
|
||||||
|
python -m pip install --upgrade pip
|
||||||
|
pip install setuptools wheel requests packaging
|
||||||
|
|
||||||
|
- name: Install dependencies for JavaScript SDK
|
||||||
|
run: pnpm install
|
||||||
|
working-directory: ./apps/js-sdk/firecrawl
|
||||||
|
|
||||||
|
- name: Run version check script
|
||||||
|
id: version_check_script
|
||||||
|
run: |
|
||||||
|
VERSION_INCREMENTED=$(python .github/scripts/check_version_has_incremented.py js ./apps/js-sdk/firecrawl @mendable/firecrawl-js)
|
||||||
|
echo "VERSION_INCREMENTED=$VERSION_INCREMENTED" >> $GITHUB_ENV
|
||||||
|
|
||||||
|
- name: Build and publish to npm
|
||||||
|
if: ${{ env.VERSION_INCREMENTED == 'true' }}
|
||||||
|
env:
|
||||||
|
NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
|
||||||
|
run: |
|
||||||
|
npm run build-and-publish
|
||||||
|
working-directory: ./apps/js-sdk/firecrawl
|
47
.github/workflows/publish-python-sdk.yml
vendored
Normal file
47
.github/workflows/publish-python-sdk.yml
vendored
Normal file
|
@ -0,0 +1,47 @@
|
||||||
|
name: Publish Python SDK
|
||||||
|
|
||||||
|
on: []
|
||||||
|
|
||||||
|
env:
|
||||||
|
PYPI_USERNAME: ${{ secrets.PYPI_USERNAME }}
|
||||||
|
PYPI_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
build-and-publish:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Checkout repository
|
||||||
|
uses: actions/checkout@v3
|
||||||
|
|
||||||
|
- name: Set up Python
|
||||||
|
uses: actions/setup-python@v4
|
||||||
|
with:
|
||||||
|
python-version: '3.x'
|
||||||
|
|
||||||
|
- name: Install dependencies
|
||||||
|
run: |
|
||||||
|
python -m pip install --upgrade pip
|
||||||
|
pip install setuptools wheel twine build requests packaging
|
||||||
|
|
||||||
|
- name: Run version check script
|
||||||
|
id: version_check_script
|
||||||
|
run: |
|
||||||
|
VERSION_INCREMENTED=$(python .github/scripts/check_version_has_incremented.py python ./apps/python-sdk/firecrawl firecrawl-py)
|
||||||
|
echo "VERSION_INCREMENTED=$VERSION_INCREMENTED" >> $GITHUB_ENV
|
||||||
|
|
||||||
|
- name: Build the package
|
||||||
|
if: ${{ env.VERSION_INCREMENTED == 'true' }}
|
||||||
|
run: |
|
||||||
|
python -m build
|
||||||
|
working-directory: ./apps/python-sdk
|
||||||
|
|
||||||
|
- name: Publish to PyPI
|
||||||
|
if: ${{ env.VERSION_INCREMENTED == 'true' }}
|
||||||
|
env:
|
||||||
|
TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
|
||||||
|
TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
|
||||||
|
run: |
|
||||||
|
twine upload dist/*
|
||||||
|
working-directory: ./apps/python-sdk
|
||||||
|
|
6
.github/workflows/python-sdk.yml
vendored
6
.github/workflows/python-sdk.yml
vendored
|
@ -1,9 +1,7 @@
|
||||||
name: Run Python SDK E2E Tests
|
name: Run Python SDK E2E Tests
|
||||||
|
|
||||||
on:
|
on: []
|
||||||
pull_request:
|
|
||||||
branches:
|
|
||||||
- main
|
|
||||||
env:
|
env:
|
||||||
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
|
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
|
||||||
BULL_AUTH_KEY: ${{ secrets.BULL_AUTH_KEY }}
|
BULL_AUTH_KEY: ${{ secrets.BULL_AUTH_KEY }}
|
||||||
|
|
|
@ -17,7 +17,7 @@ describe("E2E Tests for API Routes", () => {
|
||||||
delete process.env.USE_DB_AUTHENTICATION;
|
delete process.env.USE_DB_AUTHENTICATION;
|
||||||
});
|
});
|
||||||
describe("GET /", () => {
|
describe("GET /", () => {
|
||||||
it("should return Hello, world! message", async () => {
|
it.concurrent("should return Hello, world! message", async () => {
|
||||||
const response = await request(TEST_URL).get("/");
|
const response = await request(TEST_URL).get("/");
|
||||||
|
|
||||||
expect(response.statusCode).toBe(200);
|
expect(response.statusCode).toBe(200);
|
||||||
|
@ -26,7 +26,7 @@ describe("E2E Tests for API Routes", () => {
|
||||||
});
|
});
|
||||||
|
|
||||||
describe("GET /test", () => {
|
describe("GET /test", () => {
|
||||||
it("should return Hello, world! message", async () => {
|
it.concurrent("should return Hello, world! message", async () => {
|
||||||
const response = await request(TEST_URL).get("/test");
|
const response = await request(TEST_URL).get("/test");
|
||||||
expect(response.statusCode).toBe(200);
|
expect(response.statusCode).toBe(200);
|
||||||
expect(response.text).toContain("Hello, world!");
|
expect(response.text).toContain("Hello, world!");
|
||||||
|
@ -34,12 +34,12 @@ describe("E2E Tests for API Routes", () => {
|
||||||
});
|
});
|
||||||
|
|
||||||
describe("POST /v0/scrape", () => {
|
describe("POST /v0/scrape", () => {
|
||||||
it("should require authorization", async () => {
|
it.concurrent("should require authorization", async () => {
|
||||||
const response = await request(app).post("/v0/scrape");
|
const response = await request(app).post("/v0/scrape");
|
||||||
expect(response.statusCode).toBe(401);
|
expect(response.statusCode).toBe(401);
|
||||||
});
|
});
|
||||||
|
|
||||||
it("should return an error response with an invalid API key", async () => {
|
it.concurrent("should return an error response with an invalid API key", async () => {
|
||||||
const response = await request(TEST_URL)
|
const response = await request(TEST_URL)
|
||||||
.post("/v0/scrape")
|
.post("/v0/scrape")
|
||||||
.set("Authorization", `Bearer invalid-api-key`)
|
.set("Authorization", `Bearer invalid-api-key`)
|
||||||
|
@ -48,7 +48,7 @@ describe("E2E Tests for API Routes", () => {
|
||||||
expect(response.statusCode).toBe(401);
|
expect(response.statusCode).toBe(401);
|
||||||
});
|
});
|
||||||
|
|
||||||
it("should return an error for a blocklisted URL", async () => {
|
it.concurrent("should return an error for a blocklisted URL", async () => {
|
||||||
const blocklistedUrl = "https://facebook.com/fake-test";
|
const blocklistedUrl = "https://facebook.com/fake-test";
|
||||||
const response = await request(TEST_URL)
|
const response = await request(TEST_URL)
|
||||||
.post("/v0/scrape")
|
.post("/v0/scrape")
|
||||||
|
@ -61,37 +61,38 @@ describe("E2E Tests for API Routes", () => {
|
||||||
);
|
);
|
||||||
});
|
});
|
||||||
|
|
||||||
it("should return a successful response with a valid preview token", async () => {
|
// tested on rate limit test
|
||||||
const response = await request(TEST_URL)
|
// it.concurrent("should return a successful response with a valid preview token", async () => {
|
||||||
.post("/v0/scrape")
|
// const response = await request(TEST_URL)
|
||||||
.set("Authorization", `Bearer this_is_just_a_preview_token`)
|
// .post("/v0/scrape")
|
||||||
.set("Content-Type", "application/json")
|
// .set("Authorization", `Bearer this_is_just_a_preview_token`)
|
||||||
.send({ url: "https://roastmywebsite.ai" });
|
// .set("Content-Type", "application/json")
|
||||||
expect(response.statusCode).toBe(200);
|
// .send({ url: "https://roastmywebsite.ai" });
|
||||||
}, 30000); // 30 seconds timeout
|
// expect(response.statusCode).toBe(200);
|
||||||
|
// }, 30000); // 30 seconds timeout
|
||||||
|
|
||||||
it("should return a successful response with a valid API key", async () => {
|
it.concurrent("should return a successful response with a valid API key", async () => {
|
||||||
const response = await request(TEST_URL)
|
const response = await request(TEST_URL)
|
||||||
.post("/v0/scrape")
|
.post("/v0/scrape")
|
||||||
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
||||||
.set("Content-Type", "application/json")
|
.set("Content-Type", "application/json")
|
||||||
.send({ url: "https://firecrawl.dev" });
|
.send({ url: "https://roastmywebsite.ai" });
|
||||||
expect(response.statusCode).toBe(200);
|
expect(response.statusCode).toBe(200);
|
||||||
expect(response.body).toHaveProperty("data");
|
expect(response.body).toHaveProperty("data");
|
||||||
expect(response.body.data).toHaveProperty("content");
|
expect(response.body.data).toHaveProperty("content");
|
||||||
expect(response.body.data).toHaveProperty("markdown");
|
expect(response.body.data).toHaveProperty("markdown");
|
||||||
expect(response.body.data).toHaveProperty("metadata");
|
expect(response.body.data).toHaveProperty("metadata");
|
||||||
expect(response.body.data).not.toHaveProperty("html");
|
expect(response.body.data).not.toHaveProperty("html");
|
||||||
expect(response.body.data.content).toContain("🔥 Firecrawl");
|
expect(response.body.data.content).toContain("_Roast_");
|
||||||
}, 30000); // 30 seconds timeout
|
}, 30000); // 30 seconds timeout
|
||||||
|
|
||||||
it("should return a successful response with a valid API key and includeHtml set to true", async () => {
|
it.concurrent("should return a successful response with a valid API key and includeHtml set to true", async () => {
|
||||||
const response = await request(TEST_URL)
|
const response = await request(TEST_URL)
|
||||||
.post("/v0/scrape")
|
.post("/v0/scrape")
|
||||||
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
||||||
.set("Content-Type", "application/json")
|
.set("Content-Type", "application/json")
|
||||||
.send({
|
.send({
|
||||||
url: "https://firecrawl.dev",
|
url: "https://roastmywebsite.ai",
|
||||||
pageOptions: { includeHtml: true },
|
pageOptions: { includeHtml: true },
|
||||||
});
|
});
|
||||||
expect(response.statusCode).toBe(200);
|
expect(response.statusCode).toBe(200);
|
||||||
|
@ -100,12 +101,12 @@ describe("E2E Tests for API Routes", () => {
|
||||||
expect(response.body.data).toHaveProperty("markdown");
|
expect(response.body.data).toHaveProperty("markdown");
|
||||||
expect(response.body.data).toHaveProperty("html");
|
expect(response.body.data).toHaveProperty("html");
|
||||||
expect(response.body.data).toHaveProperty("metadata");
|
expect(response.body.data).toHaveProperty("metadata");
|
||||||
expect(response.body.data.content).toContain("🔥 Firecrawl");
|
expect(response.body.data.content).toContain("_Roast_");
|
||||||
expect(response.body.data.markdown).toContain("🔥 Firecrawl");
|
expect(response.body.data.markdown).toContain("_Roast_");
|
||||||
expect(response.body.data.html).toContain("<h1");
|
expect(response.body.data.html).toContain("<h1");
|
||||||
}, 30000); // 30 seconds timeout
|
}, 30000); // 30 seconds timeout
|
||||||
|
|
||||||
it('should return a successful response for a valid scrape with PDF file', async () => {
|
it.concurrent('should return a successful response for a valid scrape with PDF file', async () => {
|
||||||
const response = await request(TEST_URL)
|
const response = await request(TEST_URL)
|
||||||
.post('/v0/scrape')
|
.post('/v0/scrape')
|
||||||
.set('Authorization', `Bearer ${process.env.TEST_API_KEY}`)
|
.set('Authorization', `Bearer ${process.env.TEST_API_KEY}`)
|
||||||
|
@ -120,7 +121,7 @@ describe("E2E Tests for API Routes", () => {
|
||||||
expect(response.body.data.content).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy');
|
expect(response.body.data.content).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy');
|
||||||
}, 60000); // 60 seconds
|
}, 60000); // 60 seconds
|
||||||
|
|
||||||
it('should return a successful response for a valid scrape with PDF file without explicit .pdf extension', async () => {
|
it.concurrent('should return a successful response for a valid scrape with PDF file without explicit .pdf extension', async () => {
|
||||||
const response = await request(TEST_URL)
|
const response = await request(TEST_URL)
|
||||||
.post('/v0/scrape')
|
.post('/v0/scrape')
|
||||||
.set('Authorization', `Bearer ${process.env.TEST_API_KEY}`)
|
.set('Authorization', `Bearer ${process.env.TEST_API_KEY}`)
|
||||||
|
@ -136,7 +137,7 @@ describe("E2E Tests for API Routes", () => {
|
||||||
}, 60000); // 60 seconds
|
}, 60000); // 60 seconds
|
||||||
|
|
||||||
// TODO: add this test back once we nail the waitFor option to be more deterministic
|
// TODO: add this test back once we nail the waitFor option to be more deterministic
|
||||||
// it("should return a successful response with a valid API key and waitFor option", async () => {
|
// it.concurrent("should return a successful response with a valid API key and waitFor option", async () => {
|
||||||
// const startTime = Date.now();
|
// const startTime = Date.now();
|
||||||
// const response = await request(TEST_URL)
|
// const response = await request(TEST_URL)
|
||||||
// .post("/v0/scrape")
|
// .post("/v0/scrape")
|
||||||
|
@ -158,12 +159,12 @@ describe("E2E Tests for API Routes", () => {
|
||||||
});
|
});
|
||||||
|
|
||||||
describe("POST /v0/crawl", () => {
|
describe("POST /v0/crawl", () => {
|
||||||
it("should require authorization", async () => {
|
it.concurrent("should require authorization", async () => {
|
||||||
const response = await request(TEST_URL).post("/v0/crawl");
|
const response = await request(TEST_URL).post("/v0/crawl");
|
||||||
expect(response.statusCode).toBe(401);
|
expect(response.statusCode).toBe(401);
|
||||||
});
|
});
|
||||||
|
|
||||||
it("should return an error response with an invalid API key", async () => {
|
it.concurrent("should return an error response with an invalid API key", async () => {
|
||||||
const response = await request(TEST_URL)
|
const response = await request(TEST_URL)
|
||||||
.post("/v0/crawl")
|
.post("/v0/crawl")
|
||||||
.set("Authorization", `Bearer invalid-api-key`)
|
.set("Authorization", `Bearer invalid-api-key`)
|
||||||
|
@ -172,7 +173,7 @@ describe("E2E Tests for API Routes", () => {
|
||||||
expect(response.statusCode).toBe(401);
|
expect(response.statusCode).toBe(401);
|
||||||
});
|
});
|
||||||
|
|
||||||
it("should return an error for a blocklisted URL", async () => {
|
it.concurrent("should return an error for a blocklisted URL", async () => {
|
||||||
const blocklistedUrl = "https://twitter.com/fake-test";
|
const blocklistedUrl = "https://twitter.com/fake-test";
|
||||||
const response = await request(TEST_URL)
|
const response = await request(TEST_URL)
|
||||||
.post("/v0/crawl")
|
.post("/v0/crawl")
|
||||||
|
@ -185,7 +186,7 @@ describe("E2E Tests for API Routes", () => {
|
||||||
);
|
);
|
||||||
});
|
});
|
||||||
|
|
||||||
it("should return a successful response with a valid API key for crawl", async () => {
|
it.concurrent("should return a successful response with a valid API key for crawl", async () => {
|
||||||
const response = await request(TEST_URL)
|
const response = await request(TEST_URL)
|
||||||
.post("/v0/crawl")
|
.post("/v0/crawl")
|
||||||
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
||||||
|
@ -197,7 +198,7 @@ describe("E2E Tests for API Routes", () => {
|
||||||
/^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[1-5][0-9a-fA-F]{3}-[89abAB][0-9a-fA-F]{3}-[0-9a-fA-F]{12}$/
|
/^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[1-5][0-9a-fA-F]{3}-[89abAB][0-9a-fA-F]{3}-[0-9a-fA-F]{12}$/
|
||||||
);
|
);
|
||||||
});
|
});
|
||||||
it('should prevent duplicate requests using the same idempotency key', async () => {
|
it.concurrent('should prevent duplicate requests using the same idempotency key', async () => {
|
||||||
const uniqueIdempotencyKey = uuidv4();
|
const uniqueIdempotencyKey = uuidv4();
|
||||||
|
|
||||||
// First request with the idempotency key
|
// First request with the idempotency key
|
||||||
|
@ -222,7 +223,7 @@ describe("E2E Tests for API Routes", () => {
|
||||||
expect(secondResponse.body.error).toBe('Idempotency key already used');
|
expect(secondResponse.body.error).toBe('Idempotency key already used');
|
||||||
});
|
});
|
||||||
|
|
||||||
it("should return a successful response with a valid API key and valid includes option", async () => {
|
it.concurrent("should return a successful response with a valid API key and valid includes option", async () => {
|
||||||
const crawlResponse = await request(TEST_URL)
|
const crawlResponse = await request(TEST_URL)
|
||||||
.post("/v0/crawl")
|
.post("/v0/crawl")
|
||||||
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
||||||
|
@ -259,7 +260,6 @@ describe("E2E Tests for API Routes", () => {
|
||||||
);
|
);
|
||||||
expect(urls.length).toBeGreaterThan(5);
|
expect(urls.length).toBeGreaterThan(5);
|
||||||
urls.forEach((url: string) => {
|
urls.forEach((url: string) => {
|
||||||
console.log({url})
|
|
||||||
expect(url.startsWith("https://www.mendable.ai/blog/")).toBeTruthy();
|
expect(url.startsWith("https://www.mendable.ai/blog/")).toBeTruthy();
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -273,7 +273,7 @@ describe("E2E Tests for API Routes", () => {
|
||||||
expect(completedResponse.body.data[0].content).toContain("Mendable");
|
expect(completedResponse.body.data[0].content).toContain("Mendable");
|
||||||
}, 60000); // 60 seconds
|
}, 60000); // 60 seconds
|
||||||
|
|
||||||
it("should return a successful response with a valid API key and valid excludes option", async () => {
|
it.concurrent("should return a successful response with a valid API key and valid excludes option", async () => {
|
||||||
const crawlResponse = await request(TEST_URL)
|
const crawlResponse = await request(TEST_URL)
|
||||||
.post("/v0/crawl")
|
.post("/v0/crawl")
|
||||||
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
||||||
|
@ -314,7 +314,7 @@ describe("E2E Tests for API Routes", () => {
|
||||||
});
|
});
|
||||||
}, 90000); // 90 seconds
|
}, 90000); // 90 seconds
|
||||||
|
|
||||||
it("should return a successful response with a valid API key and limit to 3", async () => {
|
it.concurrent("should return a successful response with a valid API key and limit to 3", async () => {
|
||||||
const crawlResponse = await request(TEST_URL)
|
const crawlResponse = await request(TEST_URL)
|
||||||
.post("/v0/crawl")
|
.post("/v0/crawl")
|
||||||
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
||||||
|
@ -354,7 +354,7 @@ describe("E2E Tests for API Routes", () => {
|
||||||
expect(completedResponse.body.data[0].content).toContain("Mendable");
|
expect(completedResponse.body.data[0].content).toContain("Mendable");
|
||||||
}, 60000); // 60 seconds
|
}, 60000); // 60 seconds
|
||||||
|
|
||||||
it("should return a successful response with max depth option for a valid crawl job", async () => {
|
it.concurrent("should return a successful response with max depth option for a valid crawl job", async () => {
|
||||||
const crawlResponse = await request(TEST_URL)
|
const crawlResponse = await request(TEST_URL)
|
||||||
.post("/v0/crawl")
|
.post("/v0/crawl")
|
||||||
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
||||||
|
@ -396,7 +396,7 @@ describe("E2E Tests for API Routes", () => {
|
||||||
});
|
});
|
||||||
}, 120000);
|
}, 120000);
|
||||||
|
|
||||||
// it("should return a successful response with a valid API key and valid limit option", async () => {
|
// it.concurrent("should return a successful response with a valid API key and valid limit option", async () => {
|
||||||
// const crawlResponse = await request(TEST_URL)
|
// const crawlResponse = await request(TEST_URL)
|
||||||
// .post("/v0/crawl")
|
// .post("/v0/crawl")
|
||||||
// .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
// .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
||||||
|
@ -441,13 +441,13 @@ describe("E2E Tests for API Routes", () => {
|
||||||
// expect(completedResponse.body.data[0].content).not.toContain("main menu");
|
// expect(completedResponse.body.data[0].content).not.toContain("main menu");
|
||||||
// }, 60000); // 60 seconds
|
// }, 60000); // 60 seconds
|
||||||
|
|
||||||
it("should return a successful response for a valid crawl job with includeHtml set to true option", async () => {
|
it.concurrent("should return a successful response for a valid crawl job with includeHtml set to true option", async () => {
|
||||||
const crawlResponse = await request(TEST_URL)
|
const crawlResponse = await request(TEST_URL)
|
||||||
.post("/v0/crawl")
|
.post("/v0/crawl")
|
||||||
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
||||||
.set("Content-Type", "application/json")
|
.set("Content-Type", "application/json")
|
||||||
.send({
|
.send({
|
||||||
url: "https://firecrawl.dev",
|
url: "https://roastmywebsite.ai",
|
||||||
pageOptions: { includeHtml: true },
|
pageOptions: { includeHtml: true },
|
||||||
});
|
});
|
||||||
expect(crawlResponse.statusCode).toBe(200);
|
expect(crawlResponse.statusCode).toBe(200);
|
||||||
|
@ -486,19 +486,19 @@ describe("E2E Tests for API Routes", () => {
|
||||||
// 120 seconds
|
// 120 seconds
|
||||||
expect(completedResponse.body.data[0]).toHaveProperty("html");
|
expect(completedResponse.body.data[0]).toHaveProperty("html");
|
||||||
expect(completedResponse.body.data[0]).toHaveProperty("metadata");
|
expect(completedResponse.body.data[0]).toHaveProperty("metadata");
|
||||||
expect(completedResponse.body.data[0].content).toContain("🔥 Firecrawl");
|
expect(completedResponse.body.data[0].content).toContain("_Roast_");
|
||||||
expect(completedResponse.body.data[0].markdown).toContain("Firecrawl");
|
expect(completedResponse.body.data[0].markdown).toContain("_Roast_");
|
||||||
expect(completedResponse.body.data[0].html).toContain("<h1");
|
expect(completedResponse.body.data[0].html).toContain("<h1");
|
||||||
}, 60000);
|
}, 60000);
|
||||||
});
|
});
|
||||||
|
|
||||||
describe("POST /v0/crawlWebsitePreview", () => {
|
describe("POST /v0/crawlWebsitePreview", () => {
|
||||||
it("should require authorization", async () => {
|
it.concurrent("should require authorization", async () => {
|
||||||
const response = await request(TEST_URL).post("/v0/crawlWebsitePreview");
|
const response = await request(TEST_URL).post("/v0/crawlWebsitePreview");
|
||||||
expect(response.statusCode).toBe(401);
|
expect(response.statusCode).toBe(401);
|
||||||
});
|
});
|
||||||
|
|
||||||
it("should return an error response with an invalid API key", async () => {
|
it.concurrent("should return an error response with an invalid API key", async () => {
|
||||||
const response = await request(TEST_URL)
|
const response = await request(TEST_URL)
|
||||||
.post("/v0/crawlWebsitePreview")
|
.post("/v0/crawlWebsitePreview")
|
||||||
.set("Authorization", `Bearer invalid-api-key`)
|
.set("Authorization", `Bearer invalid-api-key`)
|
||||||
|
@ -507,7 +507,7 @@ describe("E2E Tests for API Routes", () => {
|
||||||
expect(response.statusCode).toBe(401);
|
expect(response.statusCode).toBe(401);
|
||||||
});
|
});
|
||||||
|
|
||||||
// it("should return an error for a blocklisted URL", async () => {
|
// it.concurrent("should return an error for a blocklisted URL", async () => {
|
||||||
// const blocklistedUrl = "https://instagram.com/fake-test";
|
// const blocklistedUrl = "https://instagram.com/fake-test";
|
||||||
// const response = await request(TEST_URL)
|
// const response = await request(TEST_URL)
|
||||||
// .post("/v0/crawlWebsitePreview")
|
// .post("/v0/crawlWebsitePreview")
|
||||||
|
@ -519,7 +519,7 @@ describe("E2E Tests for API Routes", () => {
|
||||||
// expect(response.body.error).toContain("Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it.");
|
// expect(response.body.error).toContain("Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it.");
|
||||||
// });
|
// });
|
||||||
|
|
||||||
it("should return a timeout error when scraping takes longer than the specified timeout", async () => {
|
it.concurrent("should return a timeout error when scraping takes longer than the specified timeout", async () => {
|
||||||
const response = await request(TEST_URL)
|
const response = await request(TEST_URL)
|
||||||
.post("/v0/scrape")
|
.post("/v0/scrape")
|
||||||
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
||||||
|
@ -529,27 +529,27 @@ describe("E2E Tests for API Routes", () => {
|
||||||
expect(response.statusCode).toBe(408);
|
expect(response.statusCode).toBe(408);
|
||||||
}, 3000);
|
}, 3000);
|
||||||
|
|
||||||
it("should return a successful response with a valid API key for crawlWebsitePreview", async () => {
|
// it.concurrent("should return a successful response with a valid API key for crawlWebsitePreview", async () => {
|
||||||
const response = await request(TEST_URL)
|
// const response = await request(TEST_URL)
|
||||||
.post("/v0/crawlWebsitePreview")
|
// .post("/v0/crawlWebsitePreview")
|
||||||
.set("Authorization", `Bearer this_is_just_a_preview_token`)
|
// .set("Authorization", `Bearer this_is_just_a_preview_token`)
|
||||||
.set("Content-Type", "application/json")
|
// .set("Content-Type", "application/json")
|
||||||
.send({ url: "https://firecrawl.dev" });
|
// .send({ url: "https://firecrawl.dev" });
|
||||||
expect(response.statusCode).toBe(200);
|
// expect(response.statusCode).toBe(200);
|
||||||
expect(response.body).toHaveProperty("jobId");
|
// expect(response.body).toHaveProperty("jobId");
|
||||||
expect(response.body.jobId).toMatch(
|
// expect(response.body.jobId).toMatch(
|
||||||
/^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[1-5][0-9a-fA-F]{3}-[89abAB][0-9a-fA-F]{3}-[0-9a-fA-F]{12}$/
|
// /^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[1-5][0-9a-fA-F]{3}-[89abAB][0-9a-fA-F]{3}-[0-9a-fA-F]{12}$/
|
||||||
);
|
// );
|
||||||
});
|
// });
|
||||||
});
|
});
|
||||||
|
|
||||||
describe("POST /v0/search", () => {
|
describe("POST /v0/search", () => {
|
||||||
it("should require authorization", async () => {
|
it.concurrent("should require authorization", async () => {
|
||||||
const response = await request(TEST_URL).post("/v0/search");
|
const response = await request(TEST_URL).post("/v0/search");
|
||||||
expect(response.statusCode).toBe(401);
|
expect(response.statusCode).toBe(401);
|
||||||
});
|
});
|
||||||
|
|
||||||
it("should return an error response with an invalid API key", async () => {
|
it.concurrent("should return an error response with an invalid API key", async () => {
|
||||||
const response = await request(TEST_URL)
|
const response = await request(TEST_URL)
|
||||||
.post("/v0/search")
|
.post("/v0/search")
|
||||||
.set("Authorization", `Bearer invalid-api-key`)
|
.set("Authorization", `Bearer invalid-api-key`)
|
||||||
|
@ -558,7 +558,7 @@ describe("E2E Tests for API Routes", () => {
|
||||||
expect(response.statusCode).toBe(401);
|
expect(response.statusCode).toBe(401);
|
||||||
});
|
});
|
||||||
|
|
||||||
it("should return a successful response with a valid API key for search", async () => {
|
it.concurrent("should return a successful response with a valid API key for search", async () => {
|
||||||
const response = await request(TEST_URL)
|
const response = await request(TEST_URL)
|
||||||
.post("/v0/search")
|
.post("/v0/search")
|
||||||
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
||||||
|
@ -572,31 +572,31 @@ describe("E2E Tests for API Routes", () => {
|
||||||
});
|
});
|
||||||
|
|
||||||
describe("GET /v0/crawl/status/:jobId", () => {
|
describe("GET /v0/crawl/status/:jobId", () => {
|
||||||
it("should require authorization", async () => {
|
it.concurrent("should require authorization", async () => {
|
||||||
const response = await request(TEST_URL).get("/v0/crawl/status/123");
|
const response = await request(TEST_URL).get("/v0/crawl/status/123");
|
||||||
expect(response.statusCode).toBe(401);
|
expect(response.statusCode).toBe(401);
|
||||||
});
|
});
|
||||||
|
|
||||||
it("should return an error response with an invalid API key", async () => {
|
it.concurrent("should return an error response with an invalid API key", async () => {
|
||||||
const response = await request(TEST_URL)
|
const response = await request(TEST_URL)
|
||||||
.get("/v0/crawl/status/123")
|
.get("/v0/crawl/status/123")
|
||||||
.set("Authorization", `Bearer invalid-api-key`);
|
.set("Authorization", `Bearer invalid-api-key`);
|
||||||
expect(response.statusCode).toBe(401);
|
expect(response.statusCode).toBe(401);
|
||||||
});
|
});
|
||||||
|
|
||||||
it("should return Job not found for invalid job ID", async () => {
|
it.concurrent("should return Job not found for invalid job ID", async () => {
|
||||||
const response = await request(TEST_URL)
|
const response = await request(TEST_URL)
|
||||||
.get("/v0/crawl/status/invalidJobId")
|
.get("/v0/crawl/status/invalidJobId")
|
||||||
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
|
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
|
||||||
expect(response.statusCode).toBe(404);
|
expect(response.statusCode).toBe(404);
|
||||||
});
|
});
|
||||||
|
|
||||||
it("should return a successful crawl status response for a valid crawl job", async () => {
|
it.concurrent("should return a successful crawl status response for a valid crawl job", async () => {
|
||||||
const crawlResponse = await request(TEST_URL)
|
const crawlResponse = await request(TEST_URL)
|
||||||
.post("/v0/crawl")
|
.post("/v0/crawl")
|
||||||
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
||||||
.set("Content-Type", "application/json")
|
.set("Content-Type", "application/json")
|
||||||
.send({ url: "https://firecrawl.dev" });
|
.send({ url: "https://roastmywebsite.ai" });
|
||||||
expect(crawlResponse.statusCode).toBe(200);
|
expect(crawlResponse.statusCode).toBe(200);
|
||||||
|
|
||||||
let isCompleted = false;
|
let isCompleted = false;
|
||||||
|
@ -622,10 +622,10 @@ describe("E2E Tests for API Routes", () => {
|
||||||
expect(completedResponse.body.data[0]).toHaveProperty("content");
|
expect(completedResponse.body.data[0]).toHaveProperty("content");
|
||||||
expect(completedResponse.body.data[0]).toHaveProperty("markdown");
|
expect(completedResponse.body.data[0]).toHaveProperty("markdown");
|
||||||
expect(completedResponse.body.data[0]).toHaveProperty("metadata");
|
expect(completedResponse.body.data[0]).toHaveProperty("metadata");
|
||||||
expect(completedResponse.body.data[0].content).toContain("🔥 Firecrawl");
|
expect(completedResponse.body.data[0].content).toContain("_Roast_");
|
||||||
}, 60000); // 60 seconds
|
}, 120000); // 120 seconds
|
||||||
|
|
||||||
it('should return a successful response for a valid crawl job with PDF files without explicit .pdf extension', async () => {
|
it.concurrent('should return a successful response for a valid crawl job with PDF files without explicit .pdf extension', async () => {
|
||||||
const crawlResponse = await request(TEST_URL)
|
const crawlResponse = await request(TEST_URL)
|
||||||
.post('/v0/crawl')
|
.post('/v0/crawl')
|
||||||
.set('Authorization', `Bearer ${process.env.TEST_API_KEY}`)
|
.set('Authorization', `Bearer ${process.env.TEST_API_KEY}`)
|
||||||
|
@ -660,9 +660,9 @@ describe("E2E Tests for API Routes", () => {
|
||||||
})
|
})
|
||||||
])
|
])
|
||||||
);
|
);
|
||||||
}, 60000); // 60 seconds
|
}, 120000); // 120 seconds
|
||||||
|
|
||||||
it("should return a successful response with max depth option for a valid crawl job", async () => {
|
it.concurrent("should return a successful response with max depth option for a valid crawl job", async () => {
|
||||||
const crawlResponse = await request(TEST_URL)
|
const crawlResponse = await request(TEST_URL)
|
||||||
.post("/v0/crawl")
|
.post("/v0/crawl")
|
||||||
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
||||||
|
@ -705,15 +705,15 @@ describe("E2E Tests for API Routes", () => {
|
||||||
const depth = new URL(url).pathname.split("/").filter(Boolean).length;
|
const depth = new URL(url).pathname.split("/").filter(Boolean).length;
|
||||||
expect(depth).toBeLessThanOrEqual(1);
|
expect(depth).toBeLessThanOrEqual(1);
|
||||||
});
|
});
|
||||||
}, 120000);
|
}, 180000);
|
||||||
|
|
||||||
it("should return a successful response for a valid crawl job with includeHtml set to true option", async () => {
|
it.concurrent("should return a successful response for a valid crawl job with includeHtml set to true option", async () => {
|
||||||
const crawlResponse = await request(TEST_URL)
|
const crawlResponse = await request(TEST_URL)
|
||||||
.post("/v0/crawl")
|
.post("/v0/crawl")
|
||||||
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
||||||
.set("Content-Type", "application/json")
|
.set("Content-Type", "application/json")
|
||||||
.send({
|
.send({
|
||||||
url: "https://firecrawl.dev",
|
url: "https://roastmywebsite.ai",
|
||||||
pageOptions: { includeHtml: true },
|
pageOptions: { includeHtml: true },
|
||||||
});
|
});
|
||||||
expect(crawlResponse.statusCode).toBe(200);
|
expect(crawlResponse.statusCode).toBe(200);
|
||||||
|
@ -725,12 +725,23 @@ describe("E2E Tests for API Routes", () => {
|
||||||
expect(response.body).toHaveProperty("status");
|
expect(response.body).toHaveProperty("status");
|
||||||
expect(response.body.status).toBe("active");
|
expect(response.body.status).toBe("active");
|
||||||
|
|
||||||
// wait for 30 seconds
|
let isFinished = false;
|
||||||
await new Promise((r) => setTimeout(r, 30000));
|
let completedResponse;
|
||||||
|
|
||||||
const completedResponse = await request(TEST_URL)
|
while (!isFinished) {
|
||||||
.get(`/v0/crawl/status/${crawlResponse.body.jobId}`)
|
const response = await request(TEST_URL)
|
||||||
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
|
.get(`/v0/crawl/status/${crawlResponse.body.jobId}`)
|
||||||
|
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
|
||||||
|
expect(response.statusCode).toBe(200);
|
||||||
|
expect(response.body).toHaveProperty("status");
|
||||||
|
|
||||||
|
if (response.body.status === "completed") {
|
||||||
|
isFinished = true;
|
||||||
|
completedResponse = response;
|
||||||
|
} else {
|
||||||
|
await new Promise((r) => setTimeout(r, 1000)); // Wait for 1 second before checking again
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
expect(completedResponse.statusCode).toBe(200);
|
expect(completedResponse.statusCode).toBe(200);
|
||||||
expect(completedResponse.body).toHaveProperty("status");
|
expect(completedResponse.body).toHaveProperty("status");
|
||||||
|
@ -739,17 +750,14 @@ describe("E2E Tests for API Routes", () => {
|
||||||
expect(completedResponse.body.data[0]).toHaveProperty("content");
|
expect(completedResponse.body.data[0]).toHaveProperty("content");
|
||||||
expect(completedResponse.body.data[0]).toHaveProperty("markdown");
|
expect(completedResponse.body.data[0]).toHaveProperty("markdown");
|
||||||
expect(completedResponse.body.data[0]).toHaveProperty("metadata");
|
expect(completedResponse.body.data[0]).toHaveProperty("metadata");
|
||||||
|
|
||||||
// 120 seconds
|
|
||||||
expect(completedResponse.body.data[0]).toHaveProperty("html");
|
expect(completedResponse.body.data[0]).toHaveProperty("html");
|
||||||
expect(completedResponse.body.data[0]).toHaveProperty("metadata");
|
expect(completedResponse.body.data[0].content).toContain("_Roast_");
|
||||||
expect(completedResponse.body.data[0].content).toContain("🔥 Firecrawl");
|
expect(completedResponse.body.data[0].markdown).toContain("_Roast_");
|
||||||
expect(completedResponse.body.data[0].markdown).toContain("Firecrawl");
|
|
||||||
expect(completedResponse.body.data[0].html).toContain("<h1");
|
expect(completedResponse.body.data[0].html).toContain("<h1");
|
||||||
}, 60000);
|
}, 60000);
|
||||||
}); // 60 seconds
|
}); // 60 seconds
|
||||||
|
|
||||||
it("If someone cancels a crawl job, it should turn into failed status", async () => {
|
it.concurrent("If someone cancels a crawl job, it should turn into failed status", async () => {
|
||||||
const crawlResponse = await request(TEST_URL)
|
const crawlResponse = await request(TEST_URL)
|
||||||
.post("/v0/crawl")
|
.post("/v0/crawl")
|
||||||
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
||||||
|
@ -785,7 +793,7 @@ describe("E2E Tests for API Routes", () => {
|
||||||
}, 60000); // 60 seconds
|
}, 60000); // 60 seconds
|
||||||
|
|
||||||
describe("POST /v0/scrape with LLM Extraction", () => {
|
describe("POST /v0/scrape with LLM Extraction", () => {
|
||||||
it("should extract data using LLM extraction mode", async () => {
|
it.concurrent("should extract data using LLM extraction mode", async () => {
|
||||||
const response = await request(TEST_URL)
|
const response = await request(TEST_URL)
|
||||||
.post("/v0/scrape")
|
.post("/v0/scrape")
|
||||||
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
||||||
|
@ -836,7 +844,7 @@ describe("E2E Tests for API Routes", () => {
|
||||||
});
|
});
|
||||||
|
|
||||||
// describe("POST /v0/scrape for Top 100 Companies", () => {
|
// describe("POST /v0/scrape for Top 100 Companies", () => {
|
||||||
// it("should extract data for the top 100 companies", async () => {
|
// it.concurrent("should extract data for the top 100 companies", async () => {
|
||||||
// const response = await request(TEST_URL)
|
// const response = await request(TEST_URL)
|
||||||
// .post("/v0/scrape")
|
// .post("/v0/scrape")
|
||||||
// .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
// .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
||||||
|
@ -894,7 +902,7 @@ describe("E2E Tests for API Routes", () => {
|
||||||
// });
|
// });
|
||||||
|
|
||||||
describe("POST /v0/crawl with fast mode", () => {
|
describe("POST /v0/crawl with fast mode", () => {
|
||||||
it("should complete the crawl under 20 seconds", async () => {
|
it.concurrent("should complete the crawl under 20 seconds", async () => {
|
||||||
const startTime = Date.now();
|
const startTime = Date.now();
|
||||||
|
|
||||||
const crawlResponse = await request(TEST_URL)
|
const crawlResponse = await request(TEST_URL)
|
||||||
|
@ -927,10 +935,10 @@ describe("E2E Tests for API Routes", () => {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const endTime = Date.now();
|
// const endTime = Date.now();
|
||||||
const timeElapsed = (endTime - startTime) / 1000; // Convert to seconds
|
// const timeElapsed = (endTime - startTime) / 1000; // Convert to seconds
|
||||||
|
|
||||||
console.log(`Time elapsed: ${timeElapsed} seconds`);
|
// console.log(`Time elapsed: ${timeElapsed} seconds`);
|
||||||
|
|
||||||
expect(statusResponse.body.status).toBe("completed");
|
expect(statusResponse.body.status).toBe("completed");
|
||||||
expect(statusResponse.body).toHaveProperty("data");
|
expect(statusResponse.body).toHaveProperty("data");
|
||||||
|
@ -945,7 +953,7 @@ describe("E2E Tests for API Routes", () => {
|
||||||
|
|
||||||
}, 20000);
|
}, 20000);
|
||||||
|
|
||||||
// it("should complete the crawl in more than 10 seconds", async () => {
|
// it.concurrent("should complete the crawl in more than 10 seconds", async () => {
|
||||||
// const startTime = Date.now();
|
// const startTime = Date.now();
|
||||||
|
|
||||||
// const crawlResponse = await request(TEST_URL)
|
// const crawlResponse = await request(TEST_URL)
|
||||||
|
@ -995,7 +1003,7 @@ describe("E2E Tests for API Routes", () => {
|
||||||
});
|
});
|
||||||
|
|
||||||
describe("GET /is-production", () => {
|
describe("GET /is-production", () => {
|
||||||
it("should return the production status", async () => {
|
it.concurrent("should return the production status", async () => {
|
||||||
const response = await request(TEST_URL).get("/is-production");
|
const response = await request(TEST_URL).get("/is-production");
|
||||||
expect(response.statusCode).toBe(200);
|
expect(response.statusCode).toBe(200);
|
||||||
expect(response.body).toHaveProperty("isProduction");
|
expect(response.body).toHaveProperty("isProduction");
|
||||||
|
@ -1003,8 +1011,8 @@ describe("E2E Tests for API Routes", () => {
|
||||||
});
|
});
|
||||||
|
|
||||||
describe("Rate Limiter", () => {
|
describe("Rate Limiter", () => {
|
||||||
it("should return 429 when rate limit is exceeded for preview token", async () => {
|
it.concurrent("should return 429 when rate limit is exceeded for preview token", async () => {
|
||||||
for (let i = 0; i < 4; i++) {
|
for (let i = 0; i < 5; i++) {
|
||||||
const response = await request(TEST_URL)
|
const response = await request(TEST_URL)
|
||||||
.post("/v0/scrape")
|
.post("/v0/scrape")
|
||||||
.set("Authorization", `Bearer this_is_just_a_preview_token`)
|
.set("Authorization", `Bearer this_is_just_a_preview_token`)
|
||||||
|
@ -1020,10 +1028,10 @@ describe("E2E Tests for API Routes", () => {
|
||||||
.send({ url: "https://www.scrapethissite.com" });
|
.send({ url: "https://www.scrapethissite.com" });
|
||||||
|
|
||||||
expect(response.statusCode).toBe(429);
|
expect(response.statusCode).toBe(429);
|
||||||
}, 60000);
|
}, 90000);
|
||||||
});
|
});
|
||||||
|
|
||||||
// it("should return 429 when rate limit is exceeded for API key", async () => {
|
// it.concurrent("should return 429 when rate limit is exceeded for API key", async () => {
|
||||||
// for (let i = 0; i < parseInt(process.env.RATE_LIMIT_TEST_API_KEY_SCRAPE); i++) {
|
// for (let i = 0; i < parseInt(process.env.RATE_LIMIT_TEST_API_KEY_SCRAPE); i++) {
|
||||||
// const response = await request(TEST_URL)
|
// const response = await request(TEST_URL)
|
||||||
// .post("/v0/scrape")
|
// .post("/v0/scrape")
|
||||||
|
@ -1043,7 +1051,7 @@ describe("E2E Tests for API Routes", () => {
|
||||||
// expect(response.statusCode).toBe(429);
|
// expect(response.statusCode).toBe(429);
|
||||||
// }, 60000);
|
// }, 60000);
|
||||||
|
|
||||||
// it("should return 429 when rate limit is exceeded for API key", async () => {
|
// it.concurrent("should return 429 when rate limit is exceeded for API key", async () => {
|
||||||
// for (let i = 0; i < parseInt(process.env.RATE_LIMIT_TEST_API_KEY_CRAWL); i++) {
|
// for (let i = 0; i < parseInt(process.env.RATE_LIMIT_TEST_API_KEY_CRAWL); i++) {
|
||||||
// const response = await request(TEST_URL)
|
// const response = await request(TEST_URL)
|
||||||
// .post("/v0/crawl")
|
// .post("/v0/crawl")
|
||||||
|
|
|
@ -1,13 +1,13 @@
|
||||||
{
|
{
|
||||||
"name": "@mendable/firecrawl-js",
|
"name": "@mendable/firecrawl-js",
|
||||||
"version": "0.0.23",
|
"version": "0.0.25",
|
||||||
"description": "JavaScript SDK for Firecrawl API",
|
"description": "JavaScript SDK for Firecrawl API",
|
||||||
"main": "build/index.js",
|
"main": "build/index.js",
|
||||||
"types": "types/index.d.ts",
|
"types": "types/index.d.ts",
|
||||||
"type": "module",
|
"type": "module",
|
||||||
"scripts": {
|
"scripts": {
|
||||||
"build": "tsc",
|
"build": "tsc",
|
||||||
"publish": "npm run build && npm publish --access public",
|
"build-and-publish": "npm run build && npm publish --access public",
|
||||||
"publish-beta": "npm run build && npm publish --access public --tag beta",
|
"publish-beta": "npm run build && npm publish --access public --tag beta",
|
||||||
"test": "jest src/__tests__/**/*.test.ts"
|
"test": "jest src/__tests__/**/*.test.ts"
|
||||||
},
|
},
|
||||||
|
|
|
@ -8,94 +8,94 @@ const TEST_API_KEY = process.env.TEST_API_KEY;
|
||||||
const API_URL = process.env.API_URL;
|
const API_URL = process.env.API_URL;
|
||||||
|
|
||||||
describe('FirecrawlApp E2E Tests', () => {
|
describe('FirecrawlApp E2E Tests', () => {
|
||||||
test('should throw error for no API key', () => {
|
test.concurrent('should throw error for no API key', () => {
|
||||||
expect(() => {
|
expect(() => {
|
||||||
new FirecrawlApp({ apiKey: null, apiUrl: API_URL });
|
new FirecrawlApp({ apiKey: null, apiUrl: API_URL });
|
||||||
}).toThrow("No API key provided");
|
}).toThrow("No API key provided");
|
||||||
});
|
});
|
||||||
|
|
||||||
test('should throw error for invalid API key on scrape', async () => {
|
test.concurrent('should throw error for invalid API key on scrape', async () => {
|
||||||
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
|
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
|
||||||
await expect(invalidApp.scrapeUrl('https://firecrawl.dev')).rejects.toThrow("Request failed with status code 401");
|
await expect(invalidApp.scrapeUrl('https://roastmywebsite.ai')).rejects.toThrow("Request failed with status code 401");
|
||||||
});
|
});
|
||||||
|
|
||||||
test('should throw error for blocklisted URL on scrape', async () => {
|
test.concurrent('should throw error for blocklisted URL on scrape', async () => {
|
||||||
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
||||||
const blocklistedUrl = "https://facebook.com/fake-test";
|
const blocklistedUrl = "https://facebook.com/fake-test";
|
||||||
await expect(app.scrapeUrl(blocklistedUrl)).rejects.toThrow("Request failed with status code 403");
|
await expect(app.scrapeUrl(blocklistedUrl)).rejects.toThrow("Request failed with status code 403");
|
||||||
});
|
});
|
||||||
|
|
||||||
test('should return successful response with valid preview token', async () => {
|
test.concurrent('should return successful response with valid preview token', async () => {
|
||||||
const app = new FirecrawlApp({ apiKey: "this_is_just_a_preview_token", apiUrl: API_URL });
|
const app = new FirecrawlApp({ apiKey: "this_is_just_a_preview_token", apiUrl: API_URL });
|
||||||
const response = await app.scrapeUrl('https://firecrawl.dev');
|
const response = await app.scrapeUrl('https://roastmywebsite.ai');
|
||||||
expect(response).not.toBeNull();
|
expect(response).not.toBeNull();
|
||||||
expect(response.data.content).toContain("🔥 Firecrawl");
|
expect(response.data.content).toContain("_Roast_");
|
||||||
}, 30000); // 30 seconds timeout
|
}, 30000); // 30 seconds timeout
|
||||||
|
|
||||||
test('should return successful response for valid scrape', async () => {
|
test.concurrent('should return successful response for valid scrape', async () => {
|
||||||
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
||||||
const response = await app.scrapeUrl('https://firecrawl.dev');
|
const response = await app.scrapeUrl('https://roastmywebsite.ai');
|
||||||
expect(response).not.toBeNull();
|
expect(response).not.toBeNull();
|
||||||
expect(response.data.content).toContain("🔥 Firecrawl");
|
expect(response.data.content).toContain("_Roast_");
|
||||||
expect(response.data).toHaveProperty('markdown');
|
expect(response.data).toHaveProperty('markdown');
|
||||||
expect(response.data).toHaveProperty('metadata');
|
expect(response.data).toHaveProperty('metadata');
|
||||||
expect(response.data).not.toHaveProperty('html');
|
expect(response.data).not.toHaveProperty('html');
|
||||||
}, 30000); // 30 seconds timeout
|
}, 30000); // 30 seconds timeout
|
||||||
|
|
||||||
test('should return successful response with valid API key and include HTML', async () => {
|
test.concurrent('should return successful response with valid API key and include HTML', async () => {
|
||||||
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
||||||
const response = await app.scrapeUrl('https://firecrawl.dev', { pageOptions: { includeHtml: true } });
|
const response = await app.scrapeUrl('https://roastmywebsite.ai', { pageOptions: { includeHtml: true } });
|
||||||
expect(response).not.toBeNull();
|
expect(response).not.toBeNull();
|
||||||
expect(response.data.content).toContain("🔥 Firecrawl");
|
expect(response.data.content).toContain("_Roast_");
|
||||||
expect(response.data.markdown).toContain("🔥 Firecrawl");
|
expect(response.data.markdown).toContain("_Roast_");
|
||||||
expect(response.data.html).toContain("<h1");
|
expect(response.data.html).toContain("<h1");
|
||||||
}, 30000); // 30 seconds timeout
|
}, 30000); // 30 seconds timeout
|
||||||
|
|
||||||
test('should return successful response for valid scrape with PDF file', async () => {
|
test.concurrent('should return successful response for valid scrape with PDF file', async () => {
|
||||||
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
||||||
const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001.pdf');
|
const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001.pdf');
|
||||||
expect(response).not.toBeNull();
|
expect(response).not.toBeNull();
|
||||||
expect(response.data.content).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy');
|
expect(response.data.content).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy');
|
||||||
}, 30000); // 30 seconds timeout
|
}, 30000); // 30 seconds timeout
|
||||||
|
|
||||||
test('should return successful response for valid scrape with PDF file without explicit extension', async () => {
|
test.concurrent('should return successful response for valid scrape with PDF file without explicit extension', async () => {
|
||||||
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
||||||
const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001');
|
const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001');
|
||||||
expect(response).not.toBeNull();
|
expect(response).not.toBeNull();
|
||||||
expect(response.data.content).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy');
|
expect(response.data.content).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy');
|
||||||
}, 30000); // 30 seconds timeout
|
}, 30000); // 30 seconds timeout
|
||||||
|
|
||||||
test('should throw error for invalid API key on crawl', async () => {
|
test.concurrent('should throw error for invalid API key on crawl', async () => {
|
||||||
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
|
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
|
||||||
await expect(invalidApp.crawlUrl('https://firecrawl.dev')).rejects.toThrow("Request failed with status code 401");
|
await expect(invalidApp.crawlUrl('https://roastmywebsite.ai')).rejects.toThrow("Request failed with status code 401");
|
||||||
});
|
});
|
||||||
|
|
||||||
test('should throw error for blocklisted URL on crawl', async () => {
|
test.concurrent('should throw error for blocklisted URL on crawl', async () => {
|
||||||
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
||||||
const blocklistedUrl = "https://twitter.com/fake-test";
|
const blocklistedUrl = "https://twitter.com/fake-test";
|
||||||
await expect(app.crawlUrl(blocklistedUrl)).rejects.toThrow("Request failed with status code 403");
|
await expect(app.crawlUrl(blocklistedUrl)).rejects.toThrow("Request failed with status code 403");
|
||||||
});
|
});
|
||||||
|
|
||||||
test('should return successful response for crawl and wait for completion', async () => {
|
test.concurrent('should return successful response for crawl and wait for completion', async () => {
|
||||||
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
||||||
const response = await app.crawlUrl('https://firecrawl.dev', { crawlerOptions: { excludes: ['blog/*'] } }, true, 30);
|
const response = await app.crawlUrl('https://roastmywebsite.ai', { crawlerOptions: { excludes: ['blog/*'] } }, true, 30);
|
||||||
expect(response).not.toBeNull();
|
expect(response).not.toBeNull();
|
||||||
expect(response[0].content).toContain("🔥 Firecrawl");
|
expect(response[0].content).toContain("_Roast_");
|
||||||
}, 60000); // 60 seconds timeout
|
}, 60000); // 60 seconds timeout
|
||||||
|
|
||||||
test('should handle idempotency key for crawl', async () => {
|
test.concurrent('should handle idempotency key for crawl', async () => {
|
||||||
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
||||||
const uniqueIdempotencyKey = uuidv4();
|
const uniqueIdempotencyKey = uuidv4();
|
||||||
const response = await app.crawlUrl('https://firecrawl.dev', { crawlerOptions: { excludes: ['blog/*'] } }, false, 2, uniqueIdempotencyKey);
|
const response = await app.crawlUrl('https://roastmywebsite.ai', { crawlerOptions: { excludes: ['blog/*'] } }, false, 2, uniqueIdempotencyKey);
|
||||||
expect(response).not.toBeNull();
|
expect(response).not.toBeNull();
|
||||||
expect(response.jobId).toBeDefined();
|
expect(response.jobId).toBeDefined();
|
||||||
|
|
||||||
await expect(app.crawlUrl('https://firecrawl.dev', { crawlerOptions: { excludes: ['blog/*'] } }, true, 2, uniqueIdempotencyKey)).rejects.toThrow("Request failed with status code 409");
|
await expect(app.crawlUrl('https://roastmywebsite.ai', { crawlerOptions: { excludes: ['blog/*'] } }, true, 2, uniqueIdempotencyKey)).rejects.toThrow("Request failed with status code 409");
|
||||||
});
|
});
|
||||||
|
|
||||||
test('should check crawl status', async () => {
|
test.concurrent('should check crawl status', async () => {
|
||||||
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
||||||
const response = await app.crawlUrl('https://firecrawl.dev', { crawlerOptions: { excludes: ['blog/*'] } }, false);
|
const response = await app.crawlUrl('https://roastmywebsite.ai', { crawlerOptions: { excludes: ['blog/*'] } }, false);
|
||||||
expect(response).not.toBeNull();
|
expect(response).not.toBeNull();
|
||||||
expect(response.jobId).toBeDefined();
|
expect(response.jobId).toBeDefined();
|
||||||
|
|
||||||
|
@ -115,7 +115,7 @@ describe('FirecrawlApp E2E Tests', () => {
|
||||||
expect(statusResponse.data.length).toBeGreaterThan(0);
|
expect(statusResponse.data.length).toBeGreaterThan(0);
|
||||||
}, 35000); // 35 seconds timeout
|
}, 35000); // 35 seconds timeout
|
||||||
|
|
||||||
test('should return successful response for search', async () => {
|
test.concurrent('should return successful response for search', async () => {
|
||||||
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
||||||
const response = await app.search("test query");
|
const response = await app.search("test query");
|
||||||
expect(response).not.toBeNull();
|
expect(response).not.toBeNull();
|
||||||
|
@ -123,12 +123,12 @@ describe('FirecrawlApp E2E Tests', () => {
|
||||||
expect(response.data.length).toBeGreaterThan(2);
|
expect(response.data.length).toBeGreaterThan(2);
|
||||||
}, 30000); // 30 seconds timeout
|
}, 30000); // 30 seconds timeout
|
||||||
|
|
||||||
test('should throw error for invalid API key on search', async () => {
|
test.concurrent('should throw error for invalid API key on search', async () => {
|
||||||
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
|
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
|
||||||
await expect(invalidApp.search("test query")).rejects.toThrow("Request failed with status code 401");
|
await expect(invalidApp.search("test query")).rejects.toThrow("Request failed with status code 401");
|
||||||
});
|
});
|
||||||
|
|
||||||
test('should perform LLM extraction', async () => {
|
test.concurrent('should perform LLM extraction', async () => {
|
||||||
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
||||||
const response = await app.scrapeUrl("https://mendable.ai", {
|
const response = await app.scrapeUrl("https://mendable.ai", {
|
||||||
extractorOptions: {
|
extractorOptions: {
|
||||||
|
|
|
@ -1,3 +1,3 @@
|
||||||
from .firecrawl import FirecrawlApp
|
from .firecrawl import FirecrawlApp
|
||||||
|
|
||||||
__version__ = "0.0.11"
|
__version__ = "0.0.13"
|
||||||
|
|
|
@ -38,31 +38,31 @@ def test_blocklisted_url():
|
||||||
|
|
||||||
def test_successful_response_with_valid_preview_token():
|
def test_successful_response_with_valid_preview_token():
|
||||||
app = FirecrawlApp(api_url=API_URL, api_key="this_is_just_a_preview_token")
|
app = FirecrawlApp(api_url=API_URL, api_key="this_is_just_a_preview_token")
|
||||||
response = app.scrape_url('https://firecrawl.dev')
|
response = app.scrape_url('https://roastmywebsite.ai')
|
||||||
assert response is not None
|
assert response is not None
|
||||||
assert 'content' in response
|
assert 'content' in response
|
||||||
assert "🔥 Firecrawl" in response['content']
|
assert "_Roast_" in response['content']
|
||||||
|
|
||||||
def test_scrape_url_e2e():
|
def test_scrape_url_e2e():
|
||||||
app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
|
app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
|
||||||
response = app.scrape_url('https://firecrawl.dev')
|
response = app.scrape_url('https://roastmywebsite.ai')
|
||||||
assert response is not None
|
assert response is not None
|
||||||
assert 'content' in response
|
assert 'content' in response
|
||||||
assert 'markdown' in response
|
assert 'markdown' in response
|
||||||
assert 'metadata' in response
|
assert 'metadata' in response
|
||||||
assert 'html' not in response
|
assert 'html' not in response
|
||||||
assert "🔥 Firecrawl" in response['content']
|
assert "_Roast_" in response['content']
|
||||||
|
|
||||||
def test_successful_response_with_valid_api_key_and_include_html():
|
def test_successful_response_with_valid_api_key_and_include_html():
|
||||||
app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
|
app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
|
||||||
response = app.scrape_url('https://firecrawl.dev', {'pageOptions': {'includeHtml': True}})
|
response = app.scrape_url('https://roastmywebsite.ai', {'pageOptions': {'includeHtml': True}})
|
||||||
assert response is not None
|
assert response is not None
|
||||||
assert 'content' in response
|
assert 'content' in response
|
||||||
assert 'markdown' in response
|
assert 'markdown' in response
|
||||||
assert 'html' in response
|
assert 'html' in response
|
||||||
assert 'metadata' in response
|
assert 'metadata' in response
|
||||||
assert "🔥 Firecrawl" in response['content']
|
assert "_Roast_" in response['content']
|
||||||
assert "🔥 Firecrawl" in response['markdown']
|
assert "_Roast_" in response['markdown']
|
||||||
assert "<h1" in response['html']
|
assert "<h1" in response['html']
|
||||||
|
|
||||||
def test_successful_response_for_valid_scrape_with_pdf_file():
|
def test_successful_response_for_valid_scrape_with_pdf_file():
|
||||||
|
@ -97,20 +97,20 @@ def test_should_return_error_for_blocklisted_url():
|
||||||
|
|
||||||
def test_crawl_url_wait_for_completion_e2e():
|
def test_crawl_url_wait_for_completion_e2e():
|
||||||
app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
|
app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
|
||||||
response = app.crawl_url('https://firecrawl.dev', {'crawlerOptions': {'excludes': ['blog/*']}}, True)
|
response = app.crawl_url('https://roastmywebsite.ai', {'crawlerOptions': {'excludes': ['blog/*']}}, True)
|
||||||
assert response is not None
|
assert response is not None
|
||||||
assert len(response) > 0
|
assert len(response) > 0
|
||||||
assert 'content' in response[0]
|
assert 'content' in response[0]
|
||||||
assert "🔥 Firecrawl" in response[0]['content']
|
assert "_Roast_" in response[0]['content']
|
||||||
|
|
||||||
def test_crawl_url_with_idempotency_key_e2e():
|
def test_crawl_url_with_idempotency_key_e2e():
|
||||||
app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
|
app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
|
||||||
uniqueIdempotencyKey = str(uuid4())
|
uniqueIdempotencyKey = str(uuid4())
|
||||||
response = app.crawl_url('https://firecrawl.dev', {'crawlerOptions': {'excludes': ['blog/*']}}, True, 2, uniqueIdempotencyKey)
|
response = app.crawl_url('https://roastmywebsite.ai', {'crawlerOptions': {'excludes': ['blog/*']}}, True, 2, uniqueIdempotencyKey)
|
||||||
assert response is not None
|
assert response is not None
|
||||||
assert len(response) > 0
|
assert len(response) > 0
|
||||||
assert 'content' in response[0]
|
assert 'content' in response[0]
|
||||||
assert "🔥 Firecrawl" in response[0]['content']
|
assert "_Roast_" in response[0]['content']
|
||||||
|
|
||||||
with pytest.raises(Exception) as excinfo:
|
with pytest.raises(Exception) as excinfo:
|
||||||
app.crawl_url('https://firecrawl.dev', {'crawlerOptions': {'excludes': ['blog/*']}}, True, 2, uniqueIdempotencyKey)
|
app.crawl_url('https://firecrawl.dev', {'crawlerOptions': {'excludes': ['blog/*']}}, True, 2, uniqueIdempotencyKey)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user