mirror of
https://github.com/mendableai/firecrawl.git
synced 2024-11-16 11:42:24 +08:00
Merge pull request #246 from mendableai/194-sdk-ci-pipeline-for-publishing-pythonnode-sdk
[Feat] CI/CD for publishing js and python SDKs
This commit is contained in:
commit
4c3bfe4eb5
16
.github/scripts/check_version_has_incremented.py
vendored
16
.github/scripts/check_version_has_incremented.py
vendored
|
@ -1,24 +1,14 @@
|
|||
"""
|
||||
checks local verions against published verions.
|
||||
checks local versions against published versions.
|
||||
|
||||
# Usage:
|
||||
|
||||
Unix:
|
||||
python .github/scripts/check_version_has_incremented.py js ./apps/js-sdk/firecrawl @mendable/firecrawl-js
|
||||
|
||||
Windows:
|
||||
python .github\scripts\check_version_has_incremented.py js .\apps\js-sdk\firecrawl @mendable/firecrawl-js
|
||||
|
||||
Local version: 0.0.22
|
||||
Published version: 0.0.21
|
||||
true
|
||||
|
||||
Unix:
|
||||
python .github/scripts/check_version_has_incremented.py python ./apps/python-sdk/firecrawl firecrawl-py
|
||||
|
||||
Windows:
|
||||
python .github\scripts\check_version_has_incremented.py python .\apps\python-sdk\firecrawl firecrawl-py
|
||||
|
||||
Local version: 0.0.11
|
||||
Published version: 0.0.11
|
||||
false
|
||||
|
@ -88,8 +78,8 @@ if __name__ == "__main__":
|
|||
raise ValueError("Invalid package type. Use 'python' or 'js'.")
|
||||
|
||||
# Print versions for debugging
|
||||
print(f"Local version: {current_version}")
|
||||
print(f"Published version: {published_version}")
|
||||
# print(f"Local version: {current_version}")
|
||||
# print(f"Published version: {published_version}")
|
||||
|
||||
# Compare versions and print result
|
||||
if is_version_incremented(current_version, published_version):
|
||||
|
|
113
.github/workflows/fly.yml
vendored
113
.github/workflows/fly.yml
vendored
|
@ -3,8 +3,6 @@ on:
|
|||
push:
|
||||
branches:
|
||||
- main
|
||||
schedule:
|
||||
- cron: '0 */2 * * *'
|
||||
|
||||
env:
|
||||
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
|
||||
|
@ -25,9 +23,12 @@ env:
|
|||
SUPABASE_SERVICE_TOKEN: ${{ secrets.SUPABASE_SERVICE_TOKEN }}
|
||||
SUPABASE_URL: ${{ secrets.SUPABASE_URL }}
|
||||
TEST_API_KEY: ${{ secrets.TEST_API_KEY }}
|
||||
PYPI_USERNAME: ${{ secrets.PYPI_USERNAME }}
|
||||
PYPI_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
|
||||
NPM_TOKEN: ${{ secrets.NPM_TOKEN }}
|
||||
|
||||
jobs:
|
||||
pre-deploy:
|
||||
pre-deploy-e2e-tests:
|
||||
name: Pre-deploy checks
|
||||
runs-on: ubuntu-latest
|
||||
services:
|
||||
|
@ -61,7 +62,7 @@ jobs:
|
|||
|
||||
pre-deploy-test-suite:
|
||||
name: Test Suite
|
||||
needs: pre-deploy
|
||||
needs: pre-deploy-e2e-tests
|
||||
runs-on: ubuntu-latest
|
||||
services:
|
||||
redis:
|
||||
|
@ -94,10 +95,17 @@ jobs:
|
|||
run: |
|
||||
npm run test
|
||||
working-directory: ./apps/test-suite
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
|
||||
python-sdk-tests:
|
||||
name: Python SDK Tests
|
||||
needs: pre-deploy-e2e-tests
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
python-version: '3.x'
|
||||
- name: Install Python dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
|
@ -107,6 +115,17 @@ jobs:
|
|||
run: |
|
||||
pytest firecrawl/__tests__/e2e_withAuth/test.py
|
||||
working-directory: ./apps/python-sdk
|
||||
|
||||
js-sdk-tests:
|
||||
name: JavaScript SDK Tests
|
||||
needs: pre-deploy-e2e-tests
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Set up Node.js
|
||||
uses: actions/setup-node@v3
|
||||
with:
|
||||
node-version: "20"
|
||||
- name: Install dependencies for JavaScript SDK
|
||||
run: pnpm install
|
||||
working-directory: ./apps/js-sdk/firecrawl
|
||||
|
@ -117,7 +136,7 @@ jobs:
|
|||
deploy:
|
||||
name: Deploy app
|
||||
runs-on: ubuntu-latest
|
||||
needs: pre-deploy-test-suite
|
||||
needs: [pre-deploy-test-suite, python-sdk-tests, js-sdk-tests]
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Change directory
|
||||
|
@ -126,3 +145,83 @@ jobs:
|
|||
- run: flyctl deploy ./apps/api --remote-only -a firecrawl-scraper-js
|
||||
env:
|
||||
FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }}
|
||||
|
||||
build-and-publish-python-sdk:
|
||||
runs-on: ubuntu-latest
|
||||
needs: deploy
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: '3.x'
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install setuptools wheel twine build requests packaging
|
||||
|
||||
- name: Run version check script
|
||||
id: version_check_script
|
||||
run: |
|
||||
PYTHON_SDK_VERSION_INCREMENTED=$(python .github/scripts/check_version_has_incremented.py python ./apps/python-sdk/firecrawl firecrawl-py)
|
||||
echo "PYTHON_SDK_VERSION_INCREMENTED=$PYTHON_SDK_VERSION_INCREMENTED" >> $GITHUB_ENV
|
||||
|
||||
- name: Build the package
|
||||
if: ${{ env.PYTHON_SDK_VERSION_INCREMENTED == 'true' }}
|
||||
run: |
|
||||
python -m build
|
||||
working-directory: ./apps/python-sdk
|
||||
|
||||
- name: Publish to PyPI
|
||||
if: ${{ env.VERSION_INCREMENTED == 'true' }}
|
||||
env:
|
||||
TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
|
||||
TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
|
||||
run: |
|
||||
twine upload dist/*
|
||||
working-directory: ./apps/python-sdk
|
||||
|
||||
build-and-publish-js-sdk:
|
||||
runs-on: ubuntu-latest
|
||||
needs: deploy
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Set up Node.js
|
||||
uses: actions/setup-node@v3
|
||||
with:
|
||||
node-version: '20'
|
||||
registry-url: 'https://registry.npmjs.org/'
|
||||
scope: '@mendable'
|
||||
always-auth: true
|
||||
|
||||
- name: Install pnpm
|
||||
run: npm install -g pnpm
|
||||
|
||||
- name: Install python for running version check script
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install setuptools wheel requests packaging
|
||||
|
||||
- name: Install dependencies for JavaScript SDK
|
||||
run: pnpm install
|
||||
working-directory: ./apps/js-sdk/firecrawl
|
||||
|
||||
- name: Run version check script
|
||||
id: version_check_script
|
||||
run: |
|
||||
VERSION_INCREMENTED=$(python .github/scripts/check_version_has_incremented.py js ./apps/js-sdk/firecrawl @mendable/firecrawl-js)
|
||||
echo "VERSION_INCREMENTED=$VERSION_INCREMENTED" >> $GITHUB_ENV
|
||||
|
||||
- name: Build and publish to npm
|
||||
if: ${{ env.VERSION_INCREMENTED == 'true' }}
|
||||
env:
|
||||
NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
|
||||
run: |
|
||||
npm run build-and-publish
|
||||
working-directory: ./apps/js-sdk/firecrawl
|
||||
|
6
.github/workflows/js-sdk.yml
vendored
6
.github/workflows/js-sdk.yml
vendored
|
@ -1,9 +1,7 @@
|
|||
name: Run JavaScript SDK E2E Tests
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches:
|
||||
- main
|
||||
on: []
|
||||
|
||||
env:
|
||||
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
|
||||
BULL_AUTH_KEY: ${{ secrets.BULL_AUTH_KEY }}
|
||||
|
|
46
.github/workflows/publish-js-sdk.yml
vendored
Normal file
46
.github/workflows/publish-js-sdk.yml
vendored
Normal file
|
@ -0,0 +1,46 @@
|
|||
name: Publish JavaScript SDK
|
||||
|
||||
on: []
|
||||
|
||||
env:
|
||||
NPM_TOKEN: ${{ secrets.NPM_TOKEN }}
|
||||
|
||||
jobs:
|
||||
build-and-publish:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Set up Node.js
|
||||
uses: actions/setup-node@v3
|
||||
with:
|
||||
node-version: '20'
|
||||
registry-url: 'https://registry.npmjs.org/'
|
||||
scope: '@mendable'
|
||||
always-auth: true
|
||||
|
||||
- name: Install pnpm
|
||||
run: npm install -g pnpm
|
||||
|
||||
- name: Install python for running version check script
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install setuptools wheel requests packaging
|
||||
|
||||
- name: Install dependencies for JavaScript SDK
|
||||
run: pnpm install
|
||||
working-directory: ./apps/js-sdk/firecrawl
|
||||
|
||||
- name: Run version check script
|
||||
id: version_check_script
|
||||
run: |
|
||||
VERSION_INCREMENTED=$(python .github/scripts/check_version_has_incremented.py js ./apps/js-sdk/firecrawl @mendable/firecrawl-js)
|
||||
echo "VERSION_INCREMENTED=$VERSION_INCREMENTED" >> $GITHUB_ENV
|
||||
|
||||
- name: Build and publish to npm
|
||||
if: ${{ env.VERSION_INCREMENTED == 'true' }}
|
||||
env:
|
||||
NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
|
||||
run: |
|
||||
npm run build-and-publish
|
||||
working-directory: ./apps/js-sdk/firecrawl
|
47
.github/workflows/publish-python-sdk.yml
vendored
Normal file
47
.github/workflows/publish-python-sdk.yml
vendored
Normal file
|
@ -0,0 +1,47 @@
|
|||
name: Publish Python SDK
|
||||
|
||||
on: []
|
||||
|
||||
env:
|
||||
PYPI_USERNAME: ${{ secrets.PYPI_USERNAME }}
|
||||
PYPI_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
|
||||
|
||||
jobs:
|
||||
build-and-publish:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: '3.x'
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install setuptools wheel twine build requests packaging
|
||||
|
||||
- name: Run version check script
|
||||
id: version_check_script
|
||||
run: |
|
||||
VERSION_INCREMENTED=$(python .github/scripts/check_version_has_incremented.py python ./apps/python-sdk/firecrawl firecrawl-py)
|
||||
echo "VERSION_INCREMENTED=$VERSION_INCREMENTED" >> $GITHUB_ENV
|
||||
|
||||
- name: Build the package
|
||||
if: ${{ env.VERSION_INCREMENTED == 'true' }}
|
||||
run: |
|
||||
python -m build
|
||||
working-directory: ./apps/python-sdk
|
||||
|
||||
- name: Publish to PyPI
|
||||
if: ${{ env.VERSION_INCREMENTED == 'true' }}
|
||||
env:
|
||||
TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
|
||||
TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
|
||||
run: |
|
||||
twine upload dist/*
|
||||
working-directory: ./apps/python-sdk
|
||||
|
6
.github/workflows/python-sdk.yml
vendored
6
.github/workflows/python-sdk.yml
vendored
|
@ -1,9 +1,7 @@
|
|||
name: Run Python SDK E2E Tests
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches:
|
||||
- main
|
||||
on: []
|
||||
|
||||
env:
|
||||
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
|
||||
BULL_AUTH_KEY: ${{ secrets.BULL_AUTH_KEY }}
|
||||
|
|
|
@ -17,7 +17,7 @@ describe("E2E Tests for API Routes", () => {
|
|||
delete process.env.USE_DB_AUTHENTICATION;
|
||||
});
|
||||
describe("GET /", () => {
|
||||
it("should return Hello, world! message", async () => {
|
||||
it.concurrent("should return Hello, world! message", async () => {
|
||||
const response = await request(TEST_URL).get("/");
|
||||
|
||||
expect(response.statusCode).toBe(200);
|
||||
|
@ -26,7 +26,7 @@ describe("E2E Tests for API Routes", () => {
|
|||
});
|
||||
|
||||
describe("GET /test", () => {
|
||||
it("should return Hello, world! message", async () => {
|
||||
it.concurrent("should return Hello, world! message", async () => {
|
||||
const response = await request(TEST_URL).get("/test");
|
||||
expect(response.statusCode).toBe(200);
|
||||
expect(response.text).toContain("Hello, world!");
|
||||
|
@ -34,12 +34,12 @@ describe("E2E Tests for API Routes", () => {
|
|||
});
|
||||
|
||||
describe("POST /v0/scrape", () => {
|
||||
it("should require authorization", async () => {
|
||||
it.concurrent("should require authorization", async () => {
|
||||
const response = await request(app).post("/v0/scrape");
|
||||
expect(response.statusCode).toBe(401);
|
||||
});
|
||||
|
||||
it("should return an error response with an invalid API key", async () => {
|
||||
it.concurrent("should return an error response with an invalid API key", async () => {
|
||||
const response = await request(TEST_URL)
|
||||
.post("/v0/scrape")
|
||||
.set("Authorization", `Bearer invalid-api-key`)
|
||||
|
@ -48,7 +48,7 @@ describe("E2E Tests for API Routes", () => {
|
|||
expect(response.statusCode).toBe(401);
|
||||
});
|
||||
|
||||
it("should return an error for a blocklisted URL", async () => {
|
||||
it.concurrent("should return an error for a blocklisted URL", async () => {
|
||||
const blocklistedUrl = "https://facebook.com/fake-test";
|
||||
const response = await request(TEST_URL)
|
||||
.post("/v0/scrape")
|
||||
|
@ -61,37 +61,38 @@ describe("E2E Tests for API Routes", () => {
|
|||
);
|
||||
});
|
||||
|
||||
it("should return a successful response with a valid preview token", async () => {
|
||||
const response = await request(TEST_URL)
|
||||
.post("/v0/scrape")
|
||||
.set("Authorization", `Bearer this_is_just_a_preview_token`)
|
||||
.set("Content-Type", "application/json")
|
||||
.send({ url: "https://roastmywebsite.ai" });
|
||||
expect(response.statusCode).toBe(200);
|
||||
}, 30000); // 30 seconds timeout
|
||||
// tested on rate limit test
|
||||
// it.concurrent("should return a successful response with a valid preview token", async () => {
|
||||
// const response = await request(TEST_URL)
|
||||
// .post("/v0/scrape")
|
||||
// .set("Authorization", `Bearer this_is_just_a_preview_token`)
|
||||
// .set("Content-Type", "application/json")
|
||||
// .send({ url: "https://roastmywebsite.ai" });
|
||||
// expect(response.statusCode).toBe(200);
|
||||
// }, 30000); // 30 seconds timeout
|
||||
|
||||
it("should return a successful response with a valid API key", async () => {
|
||||
it.concurrent("should return a successful response with a valid API key", async () => {
|
||||
const response = await request(TEST_URL)
|
||||
.post("/v0/scrape")
|
||||
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
||||
.set("Content-Type", "application/json")
|
||||
.send({ url: "https://firecrawl.dev" });
|
||||
.send({ url: "https://roastmywebsite.ai" });
|
||||
expect(response.statusCode).toBe(200);
|
||||
expect(response.body).toHaveProperty("data");
|
||||
expect(response.body.data).toHaveProperty("content");
|
||||
expect(response.body.data).toHaveProperty("markdown");
|
||||
expect(response.body.data).toHaveProperty("metadata");
|
||||
expect(response.body.data).not.toHaveProperty("html");
|
||||
expect(response.body.data.content).toContain("🔥 Firecrawl");
|
||||
expect(response.body.data.content).toContain("_Roast_");
|
||||
}, 30000); // 30 seconds timeout
|
||||
|
||||
it("should return a successful response with a valid API key and includeHtml set to true", async () => {
|
||||
it.concurrent("should return a successful response with a valid API key and includeHtml set to true", async () => {
|
||||
const response = await request(TEST_URL)
|
||||
.post("/v0/scrape")
|
||||
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
||||
.set("Content-Type", "application/json")
|
||||
.send({
|
||||
url: "https://firecrawl.dev",
|
||||
url: "https://roastmywebsite.ai",
|
||||
pageOptions: { includeHtml: true },
|
||||
});
|
||||
expect(response.statusCode).toBe(200);
|
||||
|
@ -100,12 +101,12 @@ describe("E2E Tests for API Routes", () => {
|
|||
expect(response.body.data).toHaveProperty("markdown");
|
||||
expect(response.body.data).toHaveProperty("html");
|
||||
expect(response.body.data).toHaveProperty("metadata");
|
||||
expect(response.body.data.content).toContain("🔥 Firecrawl");
|
||||
expect(response.body.data.markdown).toContain("🔥 Firecrawl");
|
||||
expect(response.body.data.content).toContain("_Roast_");
|
||||
expect(response.body.data.markdown).toContain("_Roast_");
|
||||
expect(response.body.data.html).toContain("<h1");
|
||||
}, 30000); // 30 seconds timeout
|
||||
|
||||
it('should return a successful response for a valid scrape with PDF file', async () => {
|
||||
it.concurrent('should return a successful response for a valid scrape with PDF file', async () => {
|
||||
const response = await request(TEST_URL)
|
||||
.post('/v0/scrape')
|
||||
.set('Authorization', `Bearer ${process.env.TEST_API_KEY}`)
|
||||
|
@ -120,7 +121,7 @@ describe("E2E Tests for API Routes", () => {
|
|||
expect(response.body.data.content).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy');
|
||||
}, 60000); // 60 seconds
|
||||
|
||||
it('should return a successful response for a valid scrape with PDF file without explicit .pdf extension', async () => {
|
||||
it.concurrent('should return a successful response for a valid scrape with PDF file without explicit .pdf extension', async () => {
|
||||
const response = await request(TEST_URL)
|
||||
.post('/v0/scrape')
|
||||
.set('Authorization', `Bearer ${process.env.TEST_API_KEY}`)
|
||||
|
@ -136,7 +137,7 @@ describe("E2E Tests for API Routes", () => {
|
|||
}, 60000); // 60 seconds
|
||||
|
||||
// TODO: add this test back once we nail the waitFor option to be more deterministic
|
||||
// it("should return a successful response with a valid API key and waitFor option", async () => {
|
||||
// it.concurrent("should return a successful response with a valid API key and waitFor option", async () => {
|
||||
// const startTime = Date.now();
|
||||
// const response = await request(TEST_URL)
|
||||
// .post("/v0/scrape")
|
||||
|
@ -158,12 +159,12 @@ describe("E2E Tests for API Routes", () => {
|
|||
});
|
||||
|
||||
describe("POST /v0/crawl", () => {
|
||||
it("should require authorization", async () => {
|
||||
it.concurrent("should require authorization", async () => {
|
||||
const response = await request(TEST_URL).post("/v0/crawl");
|
||||
expect(response.statusCode).toBe(401);
|
||||
});
|
||||
|
||||
it("should return an error response with an invalid API key", async () => {
|
||||
it.concurrent("should return an error response with an invalid API key", async () => {
|
||||
const response = await request(TEST_URL)
|
||||
.post("/v0/crawl")
|
||||
.set("Authorization", `Bearer invalid-api-key`)
|
||||
|
@ -172,7 +173,7 @@ describe("E2E Tests for API Routes", () => {
|
|||
expect(response.statusCode).toBe(401);
|
||||
});
|
||||
|
||||
it("should return an error for a blocklisted URL", async () => {
|
||||
it.concurrent("should return an error for a blocklisted URL", async () => {
|
||||
const blocklistedUrl = "https://twitter.com/fake-test";
|
||||
const response = await request(TEST_URL)
|
||||
.post("/v0/crawl")
|
||||
|
@ -185,7 +186,7 @@ describe("E2E Tests for API Routes", () => {
|
|||
);
|
||||
});
|
||||
|
||||
it("should return a successful response with a valid API key for crawl", async () => {
|
||||
it.concurrent("should return a successful response with a valid API key for crawl", async () => {
|
||||
const response = await request(TEST_URL)
|
||||
.post("/v0/crawl")
|
||||
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
||||
|
@ -197,7 +198,7 @@ describe("E2E Tests for API Routes", () => {
|
|||
/^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[1-5][0-9a-fA-F]{3}-[89abAB][0-9a-fA-F]{3}-[0-9a-fA-F]{12}$/
|
||||
);
|
||||
});
|
||||
it('should prevent duplicate requests using the same idempotency key', async () => {
|
||||
it.concurrent('should prevent duplicate requests using the same idempotency key', async () => {
|
||||
const uniqueIdempotencyKey = uuidv4();
|
||||
|
||||
// First request with the idempotency key
|
||||
|
@ -222,7 +223,7 @@ describe("E2E Tests for API Routes", () => {
|
|||
expect(secondResponse.body.error).toBe('Idempotency key already used');
|
||||
});
|
||||
|
||||
it("should return a successful response with a valid API key and valid includes option", async () => {
|
||||
it.concurrent("should return a successful response with a valid API key and valid includes option", async () => {
|
||||
const crawlResponse = await request(TEST_URL)
|
||||
.post("/v0/crawl")
|
||||
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
||||
|
@ -259,7 +260,6 @@ describe("E2E Tests for API Routes", () => {
|
|||
);
|
||||
expect(urls.length).toBeGreaterThan(5);
|
||||
urls.forEach((url: string) => {
|
||||
console.log({url})
|
||||
expect(url.startsWith("https://www.mendable.ai/blog/")).toBeTruthy();
|
||||
});
|
||||
|
||||
|
@ -273,7 +273,7 @@ describe("E2E Tests for API Routes", () => {
|
|||
expect(completedResponse.body.data[0].content).toContain("Mendable");
|
||||
}, 60000); // 60 seconds
|
||||
|
||||
it("should return a successful response with a valid API key and valid excludes option", async () => {
|
||||
it.concurrent("should return a successful response with a valid API key and valid excludes option", async () => {
|
||||
const crawlResponse = await request(TEST_URL)
|
||||
.post("/v0/crawl")
|
||||
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
||||
|
@ -314,7 +314,7 @@ describe("E2E Tests for API Routes", () => {
|
|||
});
|
||||
}, 90000); // 90 seconds
|
||||
|
||||
it("should return a successful response with a valid API key and limit to 3", async () => {
|
||||
it.concurrent("should return a successful response with a valid API key and limit to 3", async () => {
|
||||
const crawlResponse = await request(TEST_URL)
|
||||
.post("/v0/crawl")
|
||||
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
||||
|
@ -354,7 +354,7 @@ describe("E2E Tests for API Routes", () => {
|
|||
expect(completedResponse.body.data[0].content).toContain("Mendable");
|
||||
}, 60000); // 60 seconds
|
||||
|
||||
it("should return a successful response with max depth option for a valid crawl job", async () => {
|
||||
it.concurrent("should return a successful response with max depth option for a valid crawl job", async () => {
|
||||
const crawlResponse = await request(TEST_URL)
|
||||
.post("/v0/crawl")
|
||||
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
||||
|
@ -396,7 +396,7 @@ describe("E2E Tests for API Routes", () => {
|
|||
});
|
||||
}, 120000);
|
||||
|
||||
// it("should return a successful response with a valid API key and valid limit option", async () => {
|
||||
// it.concurrent("should return a successful response with a valid API key and valid limit option", async () => {
|
||||
// const crawlResponse = await request(TEST_URL)
|
||||
// .post("/v0/crawl")
|
||||
// .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
||||
|
@ -441,13 +441,13 @@ describe("E2E Tests for API Routes", () => {
|
|||
// expect(completedResponse.body.data[0].content).not.toContain("main menu");
|
||||
// }, 60000); // 60 seconds
|
||||
|
||||
it("should return a successful response for a valid crawl job with includeHtml set to true option", async () => {
|
||||
it.concurrent("should return a successful response for a valid crawl job with includeHtml set to true option", async () => {
|
||||
const crawlResponse = await request(TEST_URL)
|
||||
.post("/v0/crawl")
|
||||
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
||||
.set("Content-Type", "application/json")
|
||||
.send({
|
||||
url: "https://firecrawl.dev",
|
||||
url: "https://roastmywebsite.ai",
|
||||
pageOptions: { includeHtml: true },
|
||||
});
|
||||
expect(crawlResponse.statusCode).toBe(200);
|
||||
|
@ -486,19 +486,19 @@ describe("E2E Tests for API Routes", () => {
|
|||
// 120 seconds
|
||||
expect(completedResponse.body.data[0]).toHaveProperty("html");
|
||||
expect(completedResponse.body.data[0]).toHaveProperty("metadata");
|
||||
expect(completedResponse.body.data[0].content).toContain("🔥 Firecrawl");
|
||||
expect(completedResponse.body.data[0].markdown).toContain("Firecrawl");
|
||||
expect(completedResponse.body.data[0].content).toContain("_Roast_");
|
||||
expect(completedResponse.body.data[0].markdown).toContain("_Roast_");
|
||||
expect(completedResponse.body.data[0].html).toContain("<h1");
|
||||
}, 60000);
|
||||
});
|
||||
|
||||
describe("POST /v0/crawlWebsitePreview", () => {
|
||||
it("should require authorization", async () => {
|
||||
it.concurrent("should require authorization", async () => {
|
||||
const response = await request(TEST_URL).post("/v0/crawlWebsitePreview");
|
||||
expect(response.statusCode).toBe(401);
|
||||
});
|
||||
|
||||
it("should return an error response with an invalid API key", async () => {
|
||||
it.concurrent("should return an error response with an invalid API key", async () => {
|
||||
const response = await request(TEST_URL)
|
||||
.post("/v0/crawlWebsitePreview")
|
||||
.set("Authorization", `Bearer invalid-api-key`)
|
||||
|
@ -507,7 +507,7 @@ describe("E2E Tests for API Routes", () => {
|
|||
expect(response.statusCode).toBe(401);
|
||||
});
|
||||
|
||||
// it("should return an error for a blocklisted URL", async () => {
|
||||
// it.concurrent("should return an error for a blocklisted URL", async () => {
|
||||
// const blocklistedUrl = "https://instagram.com/fake-test";
|
||||
// const response = await request(TEST_URL)
|
||||
// .post("/v0/crawlWebsitePreview")
|
||||
|
@ -519,7 +519,7 @@ describe("E2E Tests for API Routes", () => {
|
|||
// expect(response.body.error).toContain("Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it.");
|
||||
// });
|
||||
|
||||
it("should return a timeout error when scraping takes longer than the specified timeout", async () => {
|
||||
it.concurrent("should return a timeout error when scraping takes longer than the specified timeout", async () => {
|
||||
const response = await request(TEST_URL)
|
||||
.post("/v0/scrape")
|
||||
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
||||
|
@ -529,27 +529,27 @@ describe("E2E Tests for API Routes", () => {
|
|||
expect(response.statusCode).toBe(408);
|
||||
}, 3000);
|
||||
|
||||
it("should return a successful response with a valid API key for crawlWebsitePreview", async () => {
|
||||
const response = await request(TEST_URL)
|
||||
.post("/v0/crawlWebsitePreview")
|
||||
.set("Authorization", `Bearer this_is_just_a_preview_token`)
|
||||
.set("Content-Type", "application/json")
|
||||
.send({ url: "https://firecrawl.dev" });
|
||||
expect(response.statusCode).toBe(200);
|
||||
expect(response.body).toHaveProperty("jobId");
|
||||
expect(response.body.jobId).toMatch(
|
||||
/^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[1-5][0-9a-fA-F]{3}-[89abAB][0-9a-fA-F]{3}-[0-9a-fA-F]{12}$/
|
||||
);
|
||||
});
|
||||
// it.concurrent("should return a successful response with a valid API key for crawlWebsitePreview", async () => {
|
||||
// const response = await request(TEST_URL)
|
||||
// .post("/v0/crawlWebsitePreview")
|
||||
// .set("Authorization", `Bearer this_is_just_a_preview_token`)
|
||||
// .set("Content-Type", "application/json")
|
||||
// .send({ url: "https://firecrawl.dev" });
|
||||
// expect(response.statusCode).toBe(200);
|
||||
// expect(response.body).toHaveProperty("jobId");
|
||||
// expect(response.body.jobId).toMatch(
|
||||
// /^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[1-5][0-9a-fA-F]{3}-[89abAB][0-9a-fA-F]{3}-[0-9a-fA-F]{12}$/
|
||||
// );
|
||||
// });
|
||||
});
|
||||
|
||||
describe("POST /v0/search", () => {
|
||||
it("should require authorization", async () => {
|
||||
it.concurrent("should require authorization", async () => {
|
||||
const response = await request(TEST_URL).post("/v0/search");
|
||||
expect(response.statusCode).toBe(401);
|
||||
});
|
||||
|
||||
it("should return an error response with an invalid API key", async () => {
|
||||
it.concurrent("should return an error response with an invalid API key", async () => {
|
||||
const response = await request(TEST_URL)
|
||||
.post("/v0/search")
|
||||
.set("Authorization", `Bearer invalid-api-key`)
|
||||
|
@ -558,7 +558,7 @@ describe("E2E Tests for API Routes", () => {
|
|||
expect(response.statusCode).toBe(401);
|
||||
});
|
||||
|
||||
it("should return a successful response with a valid API key for search", async () => {
|
||||
it.concurrent("should return a successful response with a valid API key for search", async () => {
|
||||
const response = await request(TEST_URL)
|
||||
.post("/v0/search")
|
||||
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
||||
|
@ -572,31 +572,31 @@ describe("E2E Tests for API Routes", () => {
|
|||
});
|
||||
|
||||
describe("GET /v0/crawl/status/:jobId", () => {
|
||||
it("should require authorization", async () => {
|
||||
it.concurrent("should require authorization", async () => {
|
||||
const response = await request(TEST_URL).get("/v0/crawl/status/123");
|
||||
expect(response.statusCode).toBe(401);
|
||||
});
|
||||
|
||||
it("should return an error response with an invalid API key", async () => {
|
||||
it.concurrent("should return an error response with an invalid API key", async () => {
|
||||
const response = await request(TEST_URL)
|
||||
.get("/v0/crawl/status/123")
|
||||
.set("Authorization", `Bearer invalid-api-key`);
|
||||
expect(response.statusCode).toBe(401);
|
||||
});
|
||||
|
||||
it("should return Job not found for invalid job ID", async () => {
|
||||
it.concurrent("should return Job not found for invalid job ID", async () => {
|
||||
const response = await request(TEST_URL)
|
||||
.get("/v0/crawl/status/invalidJobId")
|
||||
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
|
||||
expect(response.statusCode).toBe(404);
|
||||
});
|
||||
|
||||
it("should return a successful crawl status response for a valid crawl job", async () => {
|
||||
it.concurrent("should return a successful crawl status response for a valid crawl job", async () => {
|
||||
const crawlResponse = await request(TEST_URL)
|
||||
.post("/v0/crawl")
|
||||
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
||||
.set("Content-Type", "application/json")
|
||||
.send({ url: "https://firecrawl.dev" });
|
||||
.send({ url: "https://roastmywebsite.ai" });
|
||||
expect(crawlResponse.statusCode).toBe(200);
|
||||
|
||||
let isCompleted = false;
|
||||
|
@ -622,10 +622,10 @@ describe("E2E Tests for API Routes", () => {
|
|||
expect(completedResponse.body.data[0]).toHaveProperty("content");
|
||||
expect(completedResponse.body.data[0]).toHaveProperty("markdown");
|
||||
expect(completedResponse.body.data[0]).toHaveProperty("metadata");
|
||||
expect(completedResponse.body.data[0].content).toContain("🔥 Firecrawl");
|
||||
}, 60000); // 60 seconds
|
||||
expect(completedResponse.body.data[0].content).toContain("_Roast_");
|
||||
}, 120000); // 120 seconds
|
||||
|
||||
it('should return a successful response for a valid crawl job with PDF files without explicit .pdf extension', async () => {
|
||||
it.concurrent('should return a successful response for a valid crawl job with PDF files without explicit .pdf extension', async () => {
|
||||
const crawlResponse = await request(TEST_URL)
|
||||
.post('/v0/crawl')
|
||||
.set('Authorization', `Bearer ${process.env.TEST_API_KEY}`)
|
||||
|
@ -660,9 +660,9 @@ describe("E2E Tests for API Routes", () => {
|
|||
})
|
||||
])
|
||||
);
|
||||
}, 60000); // 60 seconds
|
||||
}, 120000); // 120 seconds
|
||||
|
||||
it("should return a successful response with max depth option for a valid crawl job", async () => {
|
||||
it.concurrent("should return a successful response with max depth option for a valid crawl job", async () => {
|
||||
const crawlResponse = await request(TEST_URL)
|
||||
.post("/v0/crawl")
|
||||
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
||||
|
@ -705,15 +705,15 @@ describe("E2E Tests for API Routes", () => {
|
|||
const depth = new URL(url).pathname.split("/").filter(Boolean).length;
|
||||
expect(depth).toBeLessThanOrEqual(1);
|
||||
});
|
||||
}, 120000);
|
||||
}, 180000);
|
||||
|
||||
it("should return a successful response for a valid crawl job with includeHtml set to true option", async () => {
|
||||
it.concurrent("should return a successful response for a valid crawl job with includeHtml set to true option", async () => {
|
||||
const crawlResponse = await request(TEST_URL)
|
||||
.post("/v0/crawl")
|
||||
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
||||
.set("Content-Type", "application/json")
|
||||
.send({
|
||||
url: "https://firecrawl.dev",
|
||||
url: "https://roastmywebsite.ai",
|
||||
pageOptions: { includeHtml: true },
|
||||
});
|
||||
expect(crawlResponse.statusCode).toBe(200);
|
||||
|
@ -725,12 +725,23 @@ describe("E2E Tests for API Routes", () => {
|
|||
expect(response.body).toHaveProperty("status");
|
||||
expect(response.body.status).toBe("active");
|
||||
|
||||
// wait for 30 seconds
|
||||
await new Promise((r) => setTimeout(r, 30000));
|
||||
let isFinished = false;
|
||||
let completedResponse;
|
||||
|
||||
const completedResponse = await request(TEST_URL)
|
||||
while (!isFinished) {
|
||||
const response = await request(TEST_URL)
|
||||
.get(`/v0/crawl/status/${crawlResponse.body.jobId}`)
|
||||
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
|
||||
expect(response.statusCode).toBe(200);
|
||||
expect(response.body).toHaveProperty("status");
|
||||
|
||||
if (response.body.status === "completed") {
|
||||
isFinished = true;
|
||||
completedResponse = response;
|
||||
} else {
|
||||
await new Promise((r) => setTimeout(r, 1000)); // Wait for 1 second before checking again
|
||||
}
|
||||
}
|
||||
|
||||
expect(completedResponse.statusCode).toBe(200);
|
||||
expect(completedResponse.body).toHaveProperty("status");
|
||||
|
@ -739,17 +750,14 @@ describe("E2E Tests for API Routes", () => {
|
|||
expect(completedResponse.body.data[0]).toHaveProperty("content");
|
||||
expect(completedResponse.body.data[0]).toHaveProperty("markdown");
|
||||
expect(completedResponse.body.data[0]).toHaveProperty("metadata");
|
||||
|
||||
// 120 seconds
|
||||
expect(completedResponse.body.data[0]).toHaveProperty("html");
|
||||
expect(completedResponse.body.data[0]).toHaveProperty("metadata");
|
||||
expect(completedResponse.body.data[0].content).toContain("🔥 Firecrawl");
|
||||
expect(completedResponse.body.data[0].markdown).toContain("Firecrawl");
|
||||
expect(completedResponse.body.data[0].content).toContain("_Roast_");
|
||||
expect(completedResponse.body.data[0].markdown).toContain("_Roast_");
|
||||
expect(completedResponse.body.data[0].html).toContain("<h1");
|
||||
}, 60000);
|
||||
}); // 60 seconds
|
||||
|
||||
it("If someone cancels a crawl job, it should turn into failed status", async () => {
|
||||
it.concurrent("If someone cancels a crawl job, it should turn into failed status", async () => {
|
||||
const crawlResponse = await request(TEST_URL)
|
||||
.post("/v0/crawl")
|
||||
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
||||
|
@ -785,7 +793,7 @@ describe("E2E Tests for API Routes", () => {
|
|||
}, 60000); // 60 seconds
|
||||
|
||||
describe("POST /v0/scrape with LLM Extraction", () => {
|
||||
it("should extract data using LLM extraction mode", async () => {
|
||||
it.concurrent("should extract data using LLM extraction mode", async () => {
|
||||
const response = await request(TEST_URL)
|
||||
.post("/v0/scrape")
|
||||
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
||||
|
@ -836,7 +844,7 @@ describe("E2E Tests for API Routes", () => {
|
|||
});
|
||||
|
||||
// describe("POST /v0/scrape for Top 100 Companies", () => {
|
||||
// it("should extract data for the top 100 companies", async () => {
|
||||
// it.concurrent("should extract data for the top 100 companies", async () => {
|
||||
// const response = await request(TEST_URL)
|
||||
// .post("/v0/scrape")
|
||||
// .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
||||
|
@ -894,7 +902,7 @@ describe("E2E Tests for API Routes", () => {
|
|||
// });
|
||||
|
||||
describe("POST /v0/crawl with fast mode", () => {
|
||||
it("should complete the crawl under 20 seconds", async () => {
|
||||
it.concurrent("should complete the crawl under 20 seconds", async () => {
|
||||
const startTime = Date.now();
|
||||
|
||||
const crawlResponse = await request(TEST_URL)
|
||||
|
@ -927,10 +935,10 @@ describe("E2E Tests for API Routes", () => {
|
|||
}
|
||||
}
|
||||
|
||||
const endTime = Date.now();
|
||||
const timeElapsed = (endTime - startTime) / 1000; // Convert to seconds
|
||||
// const endTime = Date.now();
|
||||
// const timeElapsed = (endTime - startTime) / 1000; // Convert to seconds
|
||||
|
||||
console.log(`Time elapsed: ${timeElapsed} seconds`);
|
||||
// console.log(`Time elapsed: ${timeElapsed} seconds`);
|
||||
|
||||
expect(statusResponse.body.status).toBe("completed");
|
||||
expect(statusResponse.body).toHaveProperty("data");
|
||||
|
@ -945,7 +953,7 @@ describe("E2E Tests for API Routes", () => {
|
|||
|
||||
}, 20000);
|
||||
|
||||
// it("should complete the crawl in more than 10 seconds", async () => {
|
||||
// it.concurrent("should complete the crawl in more than 10 seconds", async () => {
|
||||
// const startTime = Date.now();
|
||||
|
||||
// const crawlResponse = await request(TEST_URL)
|
||||
|
@ -995,7 +1003,7 @@ describe("E2E Tests for API Routes", () => {
|
|||
});
|
||||
|
||||
describe("GET /is-production", () => {
|
||||
it("should return the production status", async () => {
|
||||
it.concurrent("should return the production status", async () => {
|
||||
const response = await request(TEST_URL).get("/is-production");
|
||||
expect(response.statusCode).toBe(200);
|
||||
expect(response.body).toHaveProperty("isProduction");
|
||||
|
@ -1003,8 +1011,8 @@ describe("E2E Tests for API Routes", () => {
|
|||
});
|
||||
|
||||
describe("Rate Limiter", () => {
|
||||
it("should return 429 when rate limit is exceeded for preview token", async () => {
|
||||
for (let i = 0; i < 4; i++) {
|
||||
it.concurrent("should return 429 when rate limit is exceeded for preview token", async () => {
|
||||
for (let i = 0; i < 5; i++) {
|
||||
const response = await request(TEST_URL)
|
||||
.post("/v0/scrape")
|
||||
.set("Authorization", `Bearer this_is_just_a_preview_token`)
|
||||
|
@ -1020,10 +1028,10 @@ describe("E2E Tests for API Routes", () => {
|
|||
.send({ url: "https://www.scrapethissite.com" });
|
||||
|
||||
expect(response.statusCode).toBe(429);
|
||||
}, 60000);
|
||||
}, 90000);
|
||||
});
|
||||
|
||||
// it("should return 429 when rate limit is exceeded for API key", async () => {
|
||||
// it.concurrent("should return 429 when rate limit is exceeded for API key", async () => {
|
||||
// for (let i = 0; i < parseInt(process.env.RATE_LIMIT_TEST_API_KEY_SCRAPE); i++) {
|
||||
// const response = await request(TEST_URL)
|
||||
// .post("/v0/scrape")
|
||||
|
@ -1043,7 +1051,7 @@ describe("E2E Tests for API Routes", () => {
|
|||
// expect(response.statusCode).toBe(429);
|
||||
// }, 60000);
|
||||
|
||||
// it("should return 429 when rate limit is exceeded for API key", async () => {
|
||||
// it.concurrent("should return 429 when rate limit is exceeded for API key", async () => {
|
||||
// for (let i = 0; i < parseInt(process.env.RATE_LIMIT_TEST_API_KEY_CRAWL); i++) {
|
||||
// const response = await request(TEST_URL)
|
||||
// .post("/v0/crawl")
|
||||
|
|
|
@ -1,13 +1,13 @@
|
|||
{
|
||||
"name": "@mendable/firecrawl-js",
|
||||
"version": "0.0.23",
|
||||
"version": "0.0.25",
|
||||
"description": "JavaScript SDK for Firecrawl API",
|
||||
"main": "build/index.js",
|
||||
"types": "types/index.d.ts",
|
||||
"type": "module",
|
||||
"scripts": {
|
||||
"build": "tsc",
|
||||
"publish": "npm run build && npm publish --access public",
|
||||
"build-and-publish": "npm run build && npm publish --access public",
|
||||
"publish-beta": "npm run build && npm publish --access public --tag beta",
|
||||
"test": "jest src/__tests__/**/*.test.ts"
|
||||
},
|
||||
|
|
|
@ -8,94 +8,94 @@ const TEST_API_KEY = process.env.TEST_API_KEY;
|
|||
const API_URL = process.env.API_URL;
|
||||
|
||||
describe('FirecrawlApp E2E Tests', () => {
|
||||
test('should throw error for no API key', () => {
|
||||
test.concurrent('should throw error for no API key', () => {
|
||||
expect(() => {
|
||||
new FirecrawlApp({ apiKey: null, apiUrl: API_URL });
|
||||
}).toThrow("No API key provided");
|
||||
});
|
||||
|
||||
test('should throw error for invalid API key on scrape', async () => {
|
||||
test.concurrent('should throw error for invalid API key on scrape', async () => {
|
||||
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
|
||||
await expect(invalidApp.scrapeUrl('https://firecrawl.dev')).rejects.toThrow("Request failed with status code 401");
|
||||
await expect(invalidApp.scrapeUrl('https://roastmywebsite.ai')).rejects.toThrow("Request failed with status code 401");
|
||||
});
|
||||
|
||||
test('should throw error for blocklisted URL on scrape', async () => {
|
||||
test.concurrent('should throw error for blocklisted URL on scrape', async () => {
|
||||
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
||||
const blocklistedUrl = "https://facebook.com/fake-test";
|
||||
await expect(app.scrapeUrl(blocklistedUrl)).rejects.toThrow("Request failed with status code 403");
|
||||
});
|
||||
|
||||
test('should return successful response with valid preview token', async () => {
|
||||
test.concurrent('should return successful response with valid preview token', async () => {
|
||||
const app = new FirecrawlApp({ apiKey: "this_is_just_a_preview_token", apiUrl: API_URL });
|
||||
const response = await app.scrapeUrl('https://firecrawl.dev');
|
||||
const response = await app.scrapeUrl('https://roastmywebsite.ai');
|
||||
expect(response).not.toBeNull();
|
||||
expect(response.data.content).toContain("🔥 Firecrawl");
|
||||
expect(response.data.content).toContain("_Roast_");
|
||||
}, 30000); // 30 seconds timeout
|
||||
|
||||
test('should return successful response for valid scrape', async () => {
|
||||
test.concurrent('should return successful response for valid scrape', async () => {
|
||||
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
||||
const response = await app.scrapeUrl('https://firecrawl.dev');
|
||||
const response = await app.scrapeUrl('https://roastmywebsite.ai');
|
||||
expect(response).not.toBeNull();
|
||||
expect(response.data.content).toContain("🔥 Firecrawl");
|
||||
expect(response.data.content).toContain("_Roast_");
|
||||
expect(response.data).toHaveProperty('markdown');
|
||||
expect(response.data).toHaveProperty('metadata');
|
||||
expect(response.data).not.toHaveProperty('html');
|
||||
}, 30000); // 30 seconds timeout
|
||||
|
||||
test('should return successful response with valid API key and include HTML', async () => {
|
||||
test.concurrent('should return successful response with valid API key and include HTML', async () => {
|
||||
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
||||
const response = await app.scrapeUrl('https://firecrawl.dev', { pageOptions: { includeHtml: true } });
|
||||
const response = await app.scrapeUrl('https://roastmywebsite.ai', { pageOptions: { includeHtml: true } });
|
||||
expect(response).not.toBeNull();
|
||||
expect(response.data.content).toContain("🔥 Firecrawl");
|
||||
expect(response.data.markdown).toContain("🔥 Firecrawl");
|
||||
expect(response.data.content).toContain("_Roast_");
|
||||
expect(response.data.markdown).toContain("_Roast_");
|
||||
expect(response.data.html).toContain("<h1");
|
||||
}, 30000); // 30 seconds timeout
|
||||
|
||||
test('should return successful response for valid scrape with PDF file', async () => {
|
||||
test.concurrent('should return successful response for valid scrape with PDF file', async () => {
|
||||
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
||||
const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001.pdf');
|
||||
expect(response).not.toBeNull();
|
||||
expect(response.data.content).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy');
|
||||
}, 30000); // 30 seconds timeout
|
||||
|
||||
test('should return successful response for valid scrape with PDF file without explicit extension', async () => {
|
||||
test.concurrent('should return successful response for valid scrape with PDF file without explicit extension', async () => {
|
||||
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
||||
const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001');
|
||||
expect(response).not.toBeNull();
|
||||
expect(response.data.content).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy');
|
||||
}, 30000); // 30 seconds timeout
|
||||
|
||||
test('should throw error for invalid API key on crawl', async () => {
|
||||
test.concurrent('should throw error for invalid API key on crawl', async () => {
|
||||
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
|
||||
await expect(invalidApp.crawlUrl('https://firecrawl.dev')).rejects.toThrow("Request failed with status code 401");
|
||||
await expect(invalidApp.crawlUrl('https://roastmywebsite.ai')).rejects.toThrow("Request failed with status code 401");
|
||||
});
|
||||
|
||||
test('should throw error for blocklisted URL on crawl', async () => {
|
||||
test.concurrent('should throw error for blocklisted URL on crawl', async () => {
|
||||
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
||||
const blocklistedUrl = "https://twitter.com/fake-test";
|
||||
await expect(app.crawlUrl(blocklistedUrl)).rejects.toThrow("Request failed with status code 403");
|
||||
});
|
||||
|
||||
test('should return successful response for crawl and wait for completion', async () => {
|
||||
test.concurrent('should return successful response for crawl and wait for completion', async () => {
|
||||
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
||||
const response = await app.crawlUrl('https://firecrawl.dev', { crawlerOptions: { excludes: ['blog/*'] } }, true, 30);
|
||||
const response = await app.crawlUrl('https://roastmywebsite.ai', { crawlerOptions: { excludes: ['blog/*'] } }, true, 30);
|
||||
expect(response).not.toBeNull();
|
||||
expect(response[0].content).toContain("🔥 Firecrawl");
|
||||
expect(response[0].content).toContain("_Roast_");
|
||||
}, 60000); // 60 seconds timeout
|
||||
|
||||
test('should handle idempotency key for crawl', async () => {
|
||||
test.concurrent('should handle idempotency key for crawl', async () => {
|
||||
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
||||
const uniqueIdempotencyKey = uuidv4();
|
||||
const response = await app.crawlUrl('https://firecrawl.dev', { crawlerOptions: { excludes: ['blog/*'] } }, false, 2, uniqueIdempotencyKey);
|
||||
const response = await app.crawlUrl('https://roastmywebsite.ai', { crawlerOptions: { excludes: ['blog/*'] } }, false, 2, uniqueIdempotencyKey);
|
||||
expect(response).not.toBeNull();
|
||||
expect(response.jobId).toBeDefined();
|
||||
|
||||
await expect(app.crawlUrl('https://firecrawl.dev', { crawlerOptions: { excludes: ['blog/*'] } }, true, 2, uniqueIdempotencyKey)).rejects.toThrow("Request failed with status code 409");
|
||||
await expect(app.crawlUrl('https://roastmywebsite.ai', { crawlerOptions: { excludes: ['blog/*'] } }, true, 2, uniqueIdempotencyKey)).rejects.toThrow("Request failed with status code 409");
|
||||
});
|
||||
|
||||
test('should check crawl status', async () => {
|
||||
test.concurrent('should check crawl status', async () => {
|
||||
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
||||
const response = await app.crawlUrl('https://firecrawl.dev', { crawlerOptions: { excludes: ['blog/*'] } }, false);
|
||||
const response = await app.crawlUrl('https://roastmywebsite.ai', { crawlerOptions: { excludes: ['blog/*'] } }, false);
|
||||
expect(response).not.toBeNull();
|
||||
expect(response.jobId).toBeDefined();
|
||||
|
||||
|
@ -115,7 +115,7 @@ describe('FirecrawlApp E2E Tests', () => {
|
|||
expect(statusResponse.data.length).toBeGreaterThan(0);
|
||||
}, 35000); // 35 seconds timeout
|
||||
|
||||
test('should return successful response for search', async () => {
|
||||
test.concurrent('should return successful response for search', async () => {
|
||||
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
||||
const response = await app.search("test query");
|
||||
expect(response).not.toBeNull();
|
||||
|
@ -123,12 +123,12 @@ describe('FirecrawlApp E2E Tests', () => {
|
|||
expect(response.data.length).toBeGreaterThan(2);
|
||||
}, 30000); // 30 seconds timeout
|
||||
|
||||
test('should throw error for invalid API key on search', async () => {
|
||||
test.concurrent('should throw error for invalid API key on search', async () => {
|
||||
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
|
||||
await expect(invalidApp.search("test query")).rejects.toThrow("Request failed with status code 401");
|
||||
});
|
||||
|
||||
test('should perform LLM extraction', async () => {
|
||||
test.concurrent('should perform LLM extraction', async () => {
|
||||
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
||||
const response = await app.scrapeUrl("https://mendable.ai", {
|
||||
extractorOptions: {
|
||||
|
|
|
@ -1,3 +1,3 @@
|
|||
from .firecrawl import FirecrawlApp
|
||||
|
||||
__version__ = "0.0.11"
|
||||
__version__ = "0.0.13"
|
||||
|
|
|
@ -38,31 +38,31 @@ def test_blocklisted_url():
|
|||
|
||||
def test_successful_response_with_valid_preview_token():
|
||||
app = FirecrawlApp(api_url=API_URL, api_key="this_is_just_a_preview_token")
|
||||
response = app.scrape_url('https://firecrawl.dev')
|
||||
response = app.scrape_url('https://roastmywebsite.ai')
|
||||
assert response is not None
|
||||
assert 'content' in response
|
||||
assert "🔥 Firecrawl" in response['content']
|
||||
assert "_Roast_" in response['content']
|
||||
|
||||
def test_scrape_url_e2e():
|
||||
app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
|
||||
response = app.scrape_url('https://firecrawl.dev')
|
||||
response = app.scrape_url('https://roastmywebsite.ai')
|
||||
assert response is not None
|
||||
assert 'content' in response
|
||||
assert 'markdown' in response
|
||||
assert 'metadata' in response
|
||||
assert 'html' not in response
|
||||
assert "🔥 Firecrawl" in response['content']
|
||||
assert "_Roast_" in response['content']
|
||||
|
||||
def test_successful_response_with_valid_api_key_and_include_html():
|
||||
app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
|
||||
response = app.scrape_url('https://firecrawl.dev', {'pageOptions': {'includeHtml': True}})
|
||||
response = app.scrape_url('https://roastmywebsite.ai', {'pageOptions': {'includeHtml': True}})
|
||||
assert response is not None
|
||||
assert 'content' in response
|
||||
assert 'markdown' in response
|
||||
assert 'html' in response
|
||||
assert 'metadata' in response
|
||||
assert "🔥 Firecrawl" in response['content']
|
||||
assert "🔥 Firecrawl" in response['markdown']
|
||||
assert "_Roast_" in response['content']
|
||||
assert "_Roast_" in response['markdown']
|
||||
assert "<h1" in response['html']
|
||||
|
||||
def test_successful_response_for_valid_scrape_with_pdf_file():
|
||||
|
@ -97,20 +97,20 @@ def test_should_return_error_for_blocklisted_url():
|
|||
|
||||
def test_crawl_url_wait_for_completion_e2e():
|
||||
app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
|
||||
response = app.crawl_url('https://firecrawl.dev', {'crawlerOptions': {'excludes': ['blog/*']}}, True)
|
||||
response = app.crawl_url('https://roastmywebsite.ai', {'crawlerOptions': {'excludes': ['blog/*']}}, True)
|
||||
assert response is not None
|
||||
assert len(response) > 0
|
||||
assert 'content' in response[0]
|
||||
assert "🔥 Firecrawl" in response[0]['content']
|
||||
assert "_Roast_" in response[0]['content']
|
||||
|
||||
def test_crawl_url_with_idempotency_key_e2e():
|
||||
app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
|
||||
uniqueIdempotencyKey = str(uuid4())
|
||||
response = app.crawl_url('https://firecrawl.dev', {'crawlerOptions': {'excludes': ['blog/*']}}, True, 2, uniqueIdempotencyKey)
|
||||
response = app.crawl_url('https://roastmywebsite.ai', {'crawlerOptions': {'excludes': ['blog/*']}}, True, 2, uniqueIdempotencyKey)
|
||||
assert response is not None
|
||||
assert len(response) > 0
|
||||
assert 'content' in response[0]
|
||||
assert "🔥 Firecrawl" in response[0]['content']
|
||||
assert "_Roast_" in response[0]['content']
|
||||
|
||||
with pytest.raises(Exception) as excinfo:
|
||||
app.crawl_url('https://firecrawl.dev', {'crawlerOptions': {'excludes': ['blog/*']}}, True, 2, uniqueIdempotencyKey)
|
||||
|
|
Loading…
Reference in New Issue
Block a user