mirror of
https://github.com/mendableai/firecrawl.git
synced 2024-11-16 11:42:24 +08:00
Added some types
This commit is contained in:
parent
b2bd562bb2
commit
20f14bcf7f
|
@ -1,5 +1,6 @@
|
|||
import request from "supertest";
|
||||
import dotenv from "dotenv";
|
||||
import { FirecrawlCrawlResponse, FirecrawlCrawlStatusResponse, FirecrawlScrapeResponse } from "../../types";
|
||||
|
||||
dotenv.config();
|
||||
const TEST_URL = "http://127.0.0.1:3002";
|
||||
|
@ -23,12 +24,12 @@ describe("E2E Tests for API Routes", () => {
|
|||
|
||||
describe("POST /v0/scrape", () => {
|
||||
it.concurrent("should require authorization", async () => {
|
||||
const response = await request(TEST_URL).post("/v0/scrape");
|
||||
const response: FirecrawlScrapeResponse = await request(TEST_URL).post("/v0/scrape");
|
||||
expect(response.statusCode).toBe(401);
|
||||
});
|
||||
|
||||
it.concurrent("should return an error response with an invalid API key", async () => {
|
||||
const response = await request(TEST_URL)
|
||||
const response: FirecrawlScrapeResponse = await request(TEST_URL)
|
||||
.post("/v0/scrape")
|
||||
.set("Authorization", `Bearer invalid-api-key`)
|
||||
.set("Content-Type", "application/json")
|
||||
|
@ -37,7 +38,7 @@ describe("E2E Tests for API Routes", () => {
|
|||
});
|
||||
|
||||
it.concurrent("should return a successful response with a valid API key", async () => {
|
||||
const response = await request(TEST_URL)
|
||||
const response: FirecrawlScrapeResponse = await request(TEST_URL)
|
||||
.post("/v0/scrape")
|
||||
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
||||
.set("Content-Type", "application/json")
|
||||
|
@ -54,7 +55,7 @@ describe("E2E Tests for API Routes", () => {
|
|||
}, 30000); // 30 seconds timeout
|
||||
|
||||
it.concurrent("should return a successful response with a valid API key and includeHtml set to true", async () => {
|
||||
const response = await request(TEST_URL)
|
||||
const response: FirecrawlScrapeResponse = await request(TEST_URL)
|
||||
.post("/v0/scrape")
|
||||
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
||||
.set("Content-Type", "application/json")
|
||||
|
@ -76,7 +77,7 @@ describe("E2E Tests for API Routes", () => {
|
|||
}, 30000); // 30 seconds timeout
|
||||
|
||||
it.concurrent('should return a successful response for a valid scrape with PDF file', async () => {
|
||||
const response = await request(TEST_URL)
|
||||
const response: FirecrawlScrapeResponse = await request(TEST_URL)
|
||||
.post('/v0/scrape')
|
||||
.set('Authorization', `Bearer ${process.env.TEST_API_KEY}`)
|
||||
.set('Content-Type', 'application/json')
|
||||
|
@ -93,7 +94,7 @@ describe("E2E Tests for API Routes", () => {
|
|||
}, 60000); // 60 seconds
|
||||
|
||||
it.concurrent('should return a successful response for a valid scrape with PDF file without explicit .pdf extension', async () => {
|
||||
const response = await request(TEST_URL)
|
||||
const response: FirecrawlScrapeResponse = await request(TEST_URL)
|
||||
.post('/v0/scrape')
|
||||
.set('Authorization', `Bearer ${process.env.TEST_API_KEY}`)
|
||||
.set('Content-Type', 'application/json')
|
||||
|
@ -110,7 +111,7 @@ describe("E2E Tests for API Routes", () => {
|
|||
}, 60000); // 60 seconds
|
||||
|
||||
it.concurrent("should return a successful response with a valid API key with removeTags option", async () => {
|
||||
const responseWithoutRemoveTags = await request(TEST_URL)
|
||||
const responseWithoutRemoveTags: FirecrawlScrapeResponse = await request(TEST_URL)
|
||||
.post("/v0/scrape")
|
||||
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
||||
.set("Content-Type", "application/json")
|
||||
|
@ -126,7 +127,7 @@ describe("E2E Tests for API Routes", () => {
|
|||
expect(responseWithoutRemoveTags.body.data.content).toContain("[Sandbox]("); // .nav
|
||||
expect(responseWithoutRemoveTags.body.data.content).toContain("web scraping"); // strong
|
||||
|
||||
const response = await request(TEST_URL)
|
||||
const response: FirecrawlScrapeResponse = await request(TEST_URL)
|
||||
.post("/v0/scrape")
|
||||
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
||||
.set("Content-Type", "application/json")
|
||||
|
@ -144,7 +145,7 @@ describe("E2E Tests for API Routes", () => {
|
|||
}, 30000); // 30 seconds timeout
|
||||
|
||||
it.concurrent('should return a successful response for a scrape with 400 page', async () => {
|
||||
const response = await request(TEST_URL)
|
||||
const response: FirecrawlScrapeResponse = await request(TEST_URL)
|
||||
.post('/v0/scrape')
|
||||
.set('Authorization', `Bearer ${process.env.TEST_API_KEY}`)
|
||||
.set('Content-Type', 'application/json')
|
||||
|
@ -160,7 +161,7 @@ describe("E2E Tests for API Routes", () => {
|
|||
}, 60000); // 60 seconds
|
||||
|
||||
it.concurrent('should return a successful response for a scrape with 401 page', async () => {
|
||||
const response = await request(TEST_URL)
|
||||
const response: FirecrawlScrapeResponse = await request(TEST_URL)
|
||||
.post('/v0/scrape')
|
||||
.set('Authorization', `Bearer ${process.env.TEST_API_KEY}`)
|
||||
.set('Content-Type', 'application/json')
|
||||
|
@ -176,7 +177,7 @@ describe("E2E Tests for API Routes", () => {
|
|||
}, 60000); // 60 seconds
|
||||
|
||||
it.concurrent("should return a successful response for a scrape with 403 page", async () => {
|
||||
const response = await request(TEST_URL)
|
||||
const response: FirecrawlScrapeResponse = await request(TEST_URL)
|
||||
.post('/v0/scrape')
|
||||
.set('Authorization', `Bearer ${process.env.TEST_API_KEY}`)
|
||||
.set('Content-Type', 'application/json')
|
||||
|
@ -192,7 +193,7 @@ describe("E2E Tests for API Routes", () => {
|
|||
}, 60000); // 60 seconds
|
||||
|
||||
it.concurrent('should return a successful response for a scrape with 404 page', async () => {
|
||||
const response = await request(TEST_URL)
|
||||
const response: FirecrawlScrapeResponse = await request(TEST_URL)
|
||||
.post('/v0/scrape')
|
||||
.set('Authorization', `Bearer ${process.env.TEST_API_KEY}`)
|
||||
.set('Content-Type', 'application/json')
|
||||
|
@ -224,7 +225,7 @@ describe("E2E Tests for API Routes", () => {
|
|||
}, 60000); // 60 seconds
|
||||
|
||||
it.concurrent('should return a successful response for a scrape with 500 page', async () => {
|
||||
const response = await request(TEST_URL)
|
||||
const response: FirecrawlScrapeResponse = await request(TEST_URL)
|
||||
.post('/v0/scrape')
|
||||
.set('Authorization', `Bearer ${process.env.TEST_API_KEY}`)
|
||||
.set('Content-Type', 'application/json')
|
||||
|
@ -242,12 +243,12 @@ describe("E2E Tests for API Routes", () => {
|
|||
|
||||
describe("POST /v0/crawl", () => {
|
||||
it.concurrent("should require authorization", async () => {
|
||||
const response = await request(TEST_URL).post("/v0/crawl");
|
||||
const response: FirecrawlCrawlResponse = await request(TEST_URL).post("/v0/crawl");
|
||||
expect(response.statusCode).toBe(401);
|
||||
});
|
||||
|
||||
it.concurrent("should return an error response with an invalid API key", async () => {
|
||||
const response = await request(TEST_URL)
|
||||
const response: FirecrawlCrawlResponse = await request(TEST_URL)
|
||||
.post("/v0/crawl")
|
||||
.set("Authorization", `Bearer invalid-api-key`)
|
||||
.set("Content-Type", "application/json")
|
||||
|
@ -256,7 +257,7 @@ describe("E2E Tests for API Routes", () => {
|
|||
});
|
||||
|
||||
it.concurrent("should return a successful response with a valid API key for crawl", async () => {
|
||||
const response = await request(TEST_URL)
|
||||
const response: FirecrawlCrawlResponse = await request(TEST_URL)
|
||||
.post("/v0/crawl")
|
||||
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
||||
.set("Content-Type", "application/json")
|
||||
|
@ -269,7 +270,7 @@ describe("E2E Tests for API Routes", () => {
|
|||
});
|
||||
|
||||
it.concurrent("should return a successful response with a valid API key and valid includes option", async () => {
|
||||
const crawlResponse = await request(TEST_URL)
|
||||
const crawlResponse: FirecrawlCrawlResponse = await request(TEST_URL)
|
||||
.post("/v0/crawl")
|
||||
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
||||
.set("Content-Type", "application/json")
|
||||
|
@ -281,7 +282,7 @@ describe("E2E Tests for API Routes", () => {
|
|||
},
|
||||
});
|
||||
|
||||
let response;
|
||||
let response: FirecrawlCrawlStatusResponse;
|
||||
let isFinished = false;
|
||||
|
||||
while (!isFinished) {
|
||||
|
@ -321,7 +322,7 @@ describe("E2E Tests for API Routes", () => {
|
|||
}, 60000); // 60 seconds
|
||||
|
||||
it.concurrent("should return a successful response with a valid API key and valid excludes option", async () => {
|
||||
const crawlResponse = await request(TEST_URL)
|
||||
const crawlResponse: FirecrawlCrawlResponse = await request(TEST_URL)
|
||||
.post("/v0/crawl")
|
||||
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
||||
.set("Content-Type", "application/json")
|
||||
|
@ -334,7 +335,7 @@ describe("E2E Tests for API Routes", () => {
|
|||
});
|
||||
|
||||
let isFinished = false;
|
||||
let response;
|
||||
let response: FirecrawlCrawlStatusResponse;
|
||||
|
||||
while (!isFinished) {
|
||||
response = await request(TEST_URL)
|
||||
|
@ -350,7 +351,7 @@ describe("E2E Tests for API Routes", () => {
|
|||
}
|
||||
}
|
||||
|
||||
const completedResponse = response;
|
||||
const completedResponse: FirecrawlCrawlStatusResponse = response;
|
||||
|
||||
const urls = completedResponse.body.data.map(
|
||||
(item: any) => item.metadata?.sourceURL
|
||||
|
@ -362,7 +363,7 @@ describe("E2E Tests for API Routes", () => {
|
|||
}, 90000); // 90 seconds
|
||||
|
||||
it.concurrent("should return a successful response with a valid API key and limit to 3", async () => {
|
||||
const crawlResponse = await request(TEST_URL)
|
||||
const crawlResponse: FirecrawlCrawlResponse = await request(TEST_URL)
|
||||
.post("/v0/crawl")
|
||||
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
||||
.set("Content-Type", "application/json")
|
||||
|
@ -372,7 +373,7 @@ describe("E2E Tests for API Routes", () => {
|
|||
});
|
||||
|
||||
let isFinished = false;
|
||||
let response;
|
||||
let response: FirecrawlCrawlStatusResponse;
|
||||
|
||||
while (!isFinished) {
|
||||
response = await request(TEST_URL)
|
||||
|
@ -388,7 +389,7 @@ describe("E2E Tests for API Routes", () => {
|
|||
}
|
||||
}
|
||||
|
||||
const completedResponse = response;
|
||||
const completedResponse: FirecrawlCrawlStatusResponse = response;
|
||||
|
||||
expect(completedResponse.statusCode).toBe(200);
|
||||
expect(completedResponse.body).toHaveProperty("status");
|
||||
|
@ -404,7 +405,7 @@ describe("E2E Tests for API Routes", () => {
|
|||
}, 60000); // 60 seconds
|
||||
|
||||
it.concurrent("should return a successful response with max depth option for a valid crawl job", async () => {
|
||||
const crawlResponse = await request(TEST_URL)
|
||||
const crawlResponse: FirecrawlCrawlResponse = await request(TEST_URL)
|
||||
.post("/v0/crawl")
|
||||
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
||||
.set("Content-Type", "application/json")
|
||||
|
@ -414,7 +415,7 @@ describe("E2E Tests for API Routes", () => {
|
|||
});
|
||||
expect(crawlResponse.statusCode).toBe(200);
|
||||
|
||||
const response = await request(TEST_URL)
|
||||
const response: FirecrawlCrawlStatusResponse = await request(TEST_URL)
|
||||
.get(`/v0/crawl/status/${crawlResponse.body.jobId}`)
|
||||
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
|
||||
expect(response.statusCode).toBe(200);
|
||||
|
@ -432,7 +433,7 @@ describe("E2E Tests for API Routes", () => {
|
|||
await new Promise((resolve) => setTimeout(resolve, 1000)); // Wait for 1 second before checking again
|
||||
}
|
||||
}
|
||||
const completedResponse = await request(TEST_URL)
|
||||
const completedResponse: FirecrawlCrawlStatusResponse = await request(TEST_URL)
|
||||
.get(`/v0/crawl/status/${crawlResponse.body.jobId}`)
|
||||
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
|
||||
|
||||
|
@ -461,12 +462,12 @@ describe("E2E Tests for API Routes", () => {
|
|||
|
||||
describe("POST /v0/crawlWebsitePreview", () => {
|
||||
it.concurrent("should require authorization", async () => {
|
||||
const response = await request(TEST_URL).post("/v0/crawlWebsitePreview");
|
||||
const response: FirecrawlCrawlResponse = await request(TEST_URL).post("/v0/crawlWebsitePreview");
|
||||
expect(response.statusCode).toBe(401);
|
||||
});
|
||||
|
||||
it.concurrent("should return an error response with an invalid API key", async () => {
|
||||
const response = await request(TEST_URL)
|
||||
const response: FirecrawlCrawlResponse = await request(TEST_URL)
|
||||
.post("/v0/crawlWebsitePreview")
|
||||
.set("Authorization", `Bearer invalid-api-key`)
|
||||
.set("Content-Type", "application/json")
|
||||
|
@ -475,7 +476,7 @@ describe("E2E Tests for API Routes", () => {
|
|||
});
|
||||
|
||||
it.concurrent("should return a timeout error when scraping takes longer than the specified timeout", async () => {
|
||||
const response = await request(TEST_URL)
|
||||
const response: FirecrawlCrawlResponse = await request(TEST_URL)
|
||||
.post("/v0/scrape")
|
||||
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
||||
.set("Content-Type", "application/json")
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
import { ExtractorOptions } from "./lib/entities";
|
||||
import { ExtractorOptions, Document } from "./lib/entities";
|
||||
|
||||
export interface CrawlResult {
|
||||
source: string;
|
||||
|
@ -43,6 +43,34 @@ export interface FirecrawlJob {
|
|||
num_tokens?: number,
|
||||
}
|
||||
|
||||
export interface FirecrawlScrapeResponse {
|
||||
statusCode: number;
|
||||
body: {
|
||||
status: string;
|
||||
data: Document;
|
||||
};
|
||||
error?: string;
|
||||
}
|
||||
|
||||
export interface FirecrawlCrawlResponse {
|
||||
statusCode: number;
|
||||
body: {
|
||||
status: string;
|
||||
jobId: string;
|
||||
|
||||
};
|
||||
error?: string;
|
||||
}
|
||||
|
||||
export interface FirecrawlCrawlStatusResponse {
|
||||
statusCode: number;
|
||||
body: {
|
||||
status: string;
|
||||
data: Document[];
|
||||
};
|
||||
error?: string;
|
||||
}
|
||||
|
||||
export enum RateLimiterMode {
|
||||
Crawl = "crawl",
|
||||
CrawlStatus = "crawlStatus",
|
||||
|
|
Loading…
Reference in New Issue
Block a user