mirror of
https://github.com/mendableai/firecrawl.git
synced 2024-11-16 11:42:24 +08:00
feat(js-sdk): fixes, update tests
This commit is contained in:
parent
5791d3a0e5
commit
6e1cf2f40d
4
apps/js-sdk/firecrawl/package-lock.json
generated
4
apps/js-sdk/firecrawl/package-lock.json
generated
|
@ -1,12 +1,12 @@
|
|||
{
|
||||
"name": "@mendable/firecrawl-js",
|
||||
"version": "1.2.1",
|
||||
"version": "1.2.3",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "@mendable/firecrawl-js",
|
||||
"version": "1.2.1",
|
||||
"version": "1.2.3",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"axios": "^1.6.8",
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
import FirecrawlApp, { CrawlParams, CrawlResponse, CrawlStatusResponse, MapResponse, ScrapeParams, ScrapeResponse } from '../../../index';
|
||||
import FirecrawlApp, { type CrawlParams, type CrawlResponse, type CrawlStatusResponse, type MapResponse, type ScrapeResponse } from '../../../index';
|
||||
import { v4 as uuidv4 } from 'uuid';
|
||||
import dotenv from 'dotenv';
|
||||
import { describe, test, expect } from '@jest/globals';
|
||||
|
@ -6,7 +6,7 @@ import { describe, test, expect } from '@jest/globals';
|
|||
dotenv.config();
|
||||
|
||||
const TEST_API_KEY = process.env.TEST_API_KEY;
|
||||
const API_URL = "http://127.0.0.1:3002";
|
||||
const API_URL = "https://api.firecrawl.dev";
|
||||
|
||||
describe('FirecrawlApp E2E Tests', () => {
|
||||
test.concurrent('should throw error for no API key', async () => {
|
||||
|
@ -71,6 +71,7 @@ describe('FirecrawlApp E2E Tests', () => {
|
|||
expect(response.links?.length).toBeGreaterThan(0);
|
||||
expect(response.links?.[0]).toContain("https://");
|
||||
expect(response.metadata).not.toBeNull();
|
||||
expect(response.metadata).not.toBeUndefined();
|
||||
expect(response.metadata).toHaveProperty("title");
|
||||
expect(response.metadata).toHaveProperty("description");
|
||||
expect(response.metadata).toHaveProperty("keywords");
|
||||
|
@ -85,19 +86,21 @@ describe('FirecrawlApp E2E Tests', () => {
|
|||
expect(response.metadata).not.toHaveProperty("pageStatusCode");
|
||||
expect(response.metadata).toHaveProperty("statusCode");
|
||||
expect(response.metadata).not.toHaveProperty("pageError");
|
||||
expect(response.metadata.error).toBeUndefined();
|
||||
expect(response.metadata.title).toBe("Roast My Website");
|
||||
expect(response.metadata.description).toBe("Welcome to Roast My Website, the ultimate tool for putting your website through the wringer! This repository harnesses the power of Firecrawl to scrape and capture screenshots of websites, and then unleashes the latest LLM vision models to mercilessly roast them. 🌶️");
|
||||
expect(response.metadata.keywords).toBe("Roast My Website,Roast,Website,GitHub,Firecrawl");
|
||||
expect(response.metadata.robots).toBe("follow, index");
|
||||
expect(response.metadata.ogTitle).toBe("Roast My Website");
|
||||
expect(response.metadata.ogDescription).toBe("Welcome to Roast My Website, the ultimate tool for putting your website through the wringer! This repository harnesses the power of Firecrawl to scrape and capture screenshots of websites, and then unleashes the latest LLM vision models to mercilessly roast them. 🌶️");
|
||||
expect(response.metadata.ogUrl).toBe("https://www.roastmywebsite.ai");
|
||||
expect(response.metadata.ogImage).toBe("https://www.roastmywebsite.ai/og.png");
|
||||
expect(response.metadata.ogLocaleAlternate).toStrictEqual([]);
|
||||
expect(response.metadata.ogSiteName).toBe("Roast My Website");
|
||||
expect(response.metadata.sourceURL).toBe("https://roastmywebsite.ai");
|
||||
expect(response.metadata.statusCode).toBe(200);
|
||||
if (response.metadata !== undefined) {
|
||||
expect(response.metadata.error).toBeUndefined();
|
||||
expect(response.metadata.title).toBe("Roast My Website");
|
||||
expect(response.metadata.description).toBe("Welcome to Roast My Website, the ultimate tool for putting your website through the wringer! This repository harnesses the power of Firecrawl to scrape and capture screenshots of websites, and then unleashes the latest LLM vision models to mercilessly roast them. 🌶️");
|
||||
expect(response.metadata.keywords).toBe("Roast My Website,Roast,Website,GitHub,Firecrawl");
|
||||
expect(response.metadata.robots).toBe("follow, index");
|
||||
expect(response.metadata.ogTitle).toBe("Roast My Website");
|
||||
expect(response.metadata.ogDescription).toBe("Welcome to Roast My Website, the ultimate tool for putting your website through the wringer! This repository harnesses the power of Firecrawl to scrape and capture screenshots of websites, and then unleashes the latest LLM vision models to mercilessly roast them. 🌶️");
|
||||
expect(response.metadata.ogUrl).toBe("https://www.roastmywebsite.ai");
|
||||
expect(response.metadata.ogImage).toBe("https://www.roastmywebsite.ai/og.png");
|
||||
expect(response.metadata.ogLocaleAlternate).toStrictEqual([]);
|
||||
expect(response.metadata.ogSiteName).toBe("Roast My Website");
|
||||
expect(response.metadata.sourceURL).toBe("https://roastmywebsite.ai");
|
||||
expect(response.metadata.statusCode).toBe(200);
|
||||
}
|
||||
}, 30000); // 30 seconds timeout
|
||||
|
||||
test.concurrent('should return successful response for valid scrape with PDF file', async () => {
|
||||
|
@ -127,7 +130,7 @@ describe('FirecrawlApp E2E Tests', () => {
|
|||
|
||||
test.concurrent('should return successful response for crawl and wait for completion', async () => {
|
||||
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
||||
const response = await app.crawlUrl('https://roastmywebsite.ai', {}, true, 30) as CrawlStatusResponse;
|
||||
const response = await app.crawlUrl('https://roastmywebsite.ai', {}, 30) as CrawlStatusResponse;
|
||||
expect(response).not.toBeNull();
|
||||
expect(response).toHaveProperty("total");
|
||||
expect(response.total).toBeGreaterThan(0);
|
||||
|
@ -138,21 +141,25 @@ describe('FirecrawlApp E2E Tests', () => {
|
|||
expect(response).toHaveProperty("status");
|
||||
expect(response.status).toBe("completed");
|
||||
expect(response).not.toHaveProperty("next"); // wait until done
|
||||
expect(response.data?.length).toBeGreaterThan(0);
|
||||
expect(response.data?.[0]).toHaveProperty("markdown");
|
||||
expect(response.data?.[0].markdown).toContain("_Roast_");
|
||||
expect(response.data?.[0]).not.toHaveProperty('content'); // v0
|
||||
expect(response.data?.[0]).not.toHaveProperty("html");
|
||||
expect(response.data?.[0]).not.toHaveProperty("rawHtml");
|
||||
expect(response.data?.[0]).not.toHaveProperty("screenshot");
|
||||
expect(response.data?.[0]).not.toHaveProperty("links");
|
||||
expect(response.data?.[0]).toHaveProperty("metadata");
|
||||
expect(response.data?.[0].metadata).toHaveProperty("title");
|
||||
expect(response.data?.[0].metadata).toHaveProperty("description");
|
||||
expect(response.data?.[0].metadata).toHaveProperty("language");
|
||||
expect(response.data?.[0].metadata).toHaveProperty("sourceURL");
|
||||
expect(response.data?.[0].metadata).toHaveProperty("statusCode");
|
||||
expect(response.data?.[0].metadata).not.toHaveProperty("error");
|
||||
expect(response.data.length).toBeGreaterThan(0);
|
||||
expect(response.data[0]).not.toBeNull();
|
||||
expect(response.data[0]).not.toBeUndefined();
|
||||
if (response.data[0]) {
|
||||
expect(response.data[0]).toHaveProperty("markdown");
|
||||
expect(response.data[0].markdown).toContain("_Roast_");
|
||||
expect(response.data[0]).not.toHaveProperty('content'); // v0
|
||||
expect(response.data[0]).not.toHaveProperty("html");
|
||||
expect(response.data[0]).not.toHaveProperty("rawHtml");
|
||||
expect(response.data[0]).not.toHaveProperty("screenshot");
|
||||
expect(response.data[0]).not.toHaveProperty("links");
|
||||
expect(response.data[0]).toHaveProperty("metadata");
|
||||
expect(response.data[0].metadata).toHaveProperty("title");
|
||||
expect(response.data[0].metadata).toHaveProperty("description");
|
||||
expect(response.data[0].metadata).toHaveProperty("language");
|
||||
expect(response.data[0].metadata).toHaveProperty("sourceURL");
|
||||
expect(response.data[0].metadata).toHaveProperty("statusCode");
|
||||
expect(response.data[0].metadata).not.toHaveProperty("error");
|
||||
}
|
||||
}, 60000); // 60 seconds timeout
|
||||
|
||||
test.concurrent('should return successful response for crawl with options and wait for completion', async () => {
|
||||
|
@ -173,7 +180,7 @@ describe('FirecrawlApp E2E Tests', () => {
|
|||
onlyMainContent: true,
|
||||
waitFor: 1000
|
||||
}
|
||||
} as CrawlParams, true, 30) as CrawlStatusResponse;
|
||||
} as CrawlParams, 30) as CrawlStatusResponse;
|
||||
expect(response).not.toBeNull();
|
||||
expect(response).toHaveProperty("total");
|
||||
expect(response.total).toBeGreaterThan(0);
|
||||
|
@ -184,41 +191,45 @@ describe('FirecrawlApp E2E Tests', () => {
|
|||
expect(response).toHaveProperty("status");
|
||||
expect(response.status).toBe("completed");
|
||||
expect(response).not.toHaveProperty("next");
|
||||
expect(response.data?.length).toBeGreaterThan(0);
|
||||
expect(response.data?.[0]).toHaveProperty("markdown");
|
||||
expect(response.data?.[0].markdown).toContain("_Roast_");
|
||||
expect(response.data?.[0]).not.toHaveProperty('content'); // v0
|
||||
expect(response.data?.[0]).toHaveProperty("html");
|
||||
expect(response.data?.[0].html).toContain("<h1");
|
||||
expect(response.data?.[0]).toHaveProperty("rawHtml");
|
||||
expect(response.data?.[0].rawHtml).toContain("<h1");
|
||||
expect(response.data?.[0]).toHaveProperty("screenshot");
|
||||
expect(response.data?.[0].screenshot).toContain("https://");
|
||||
expect(response.data?.[0]).toHaveProperty("links");
|
||||
expect(response.data?.[0].links).not.toBeNull();
|
||||
expect(response.data?.[0].links?.length).toBeGreaterThan(0);
|
||||
expect(response.data?.[0]).toHaveProperty("metadata");
|
||||
expect(response.data?.[0].metadata).toHaveProperty("title");
|
||||
expect(response.data?.[0].metadata).toHaveProperty("description");
|
||||
expect(response.data?.[0].metadata).toHaveProperty("language");
|
||||
expect(response.data?.[0].metadata).toHaveProperty("sourceURL");
|
||||
expect(response.data?.[0].metadata).toHaveProperty("statusCode");
|
||||
expect(response.data?.[0].metadata).not.toHaveProperty("error");
|
||||
expect(response.data.length).toBeGreaterThan(0);
|
||||
expect(response.data[0]).not.toBeNull();
|
||||
expect(response.data[0]).not.toBeUndefined();
|
||||
if (response.data[0]) {
|
||||
expect(response.data[0]).toHaveProperty("markdown");
|
||||
expect(response.data[0].markdown).toContain("_Roast_");
|
||||
expect(response.data[0]).not.toHaveProperty('content'); // v0
|
||||
expect(response.data[0]).toHaveProperty("html");
|
||||
expect(response.data[0].html).toContain("<h1");
|
||||
expect(response.data[0]).toHaveProperty("rawHtml");
|
||||
expect(response.data[0].rawHtml).toContain("<h1");
|
||||
expect(response.data[0]).toHaveProperty("screenshot");
|
||||
expect(response.data[0].screenshot).toContain("https://");
|
||||
expect(response.data[0]).toHaveProperty("links");
|
||||
expect(response.data[0].links).not.toBeNull();
|
||||
expect(response.data[0].links?.length).toBeGreaterThan(0);
|
||||
expect(response.data[0]).toHaveProperty("metadata");
|
||||
expect(response.data[0].metadata).toHaveProperty("title");
|
||||
expect(response.data[0].metadata).toHaveProperty("description");
|
||||
expect(response.data[0].metadata).toHaveProperty("language");
|
||||
expect(response.data[0].metadata).toHaveProperty("sourceURL");
|
||||
expect(response.data[0].metadata).toHaveProperty("statusCode");
|
||||
expect(response.data[0].metadata).not.toHaveProperty("error");
|
||||
}
|
||||
}, 60000); // 60 seconds timeout
|
||||
|
||||
test.concurrent('should handle idempotency key for crawl', async () => {
|
||||
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
||||
const uniqueIdempotencyKey = uuidv4();
|
||||
const response = await app.crawlUrl('https://roastmywebsite.ai', {}, false, 2, uniqueIdempotencyKey) as CrawlResponse;
|
||||
const response = await app.asyncCrawlUrl('https://roastmywebsite.ai', {}, uniqueIdempotencyKey) as CrawlResponse;
|
||||
expect(response).not.toBeNull();
|
||||
expect(response.id).toBeDefined();
|
||||
|
||||
await expect(app.crawlUrl('https://roastmywebsite.ai', {}, true, 2, uniqueIdempotencyKey)).rejects.toThrow("Request failed with status code 409");
|
||||
await expect(app.crawlUrl('https://roastmywebsite.ai', {}, 2, uniqueIdempotencyKey)).rejects.toThrow("Request failed with status code 409");
|
||||
});
|
||||
|
||||
test.concurrent('should check crawl status', async () => {
|
||||
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
||||
const response = await app.crawlUrl('https://firecrawl.dev', { scrapeOptions: { formats: ['markdown', 'html', 'rawHtml', 'screenshot', 'links']}} as CrawlParams, false) as CrawlResponse;
|
||||
const response = await app.asyncCrawlUrl('https://firecrawl.dev', { scrapeOptions: { formats: ['markdown', 'html', 'rawHtml', 'screenshot', 'links']}} as CrawlParams) as CrawlResponse;
|
||||
expect(response).not.toBeNull();
|
||||
expect(response.id).toBeDefined();
|
||||
|
||||
|
@ -226,7 +237,8 @@ describe('FirecrawlApp E2E Tests', () => {
|
|||
const maxChecks = 15;
|
||||
let checks = 0;
|
||||
|
||||
while (statusResponse.status === 'scraping' && checks < maxChecks) {
|
||||
expect(statusResponse.success).toBe(true);
|
||||
while ((statusResponse as any).status === 'scraping' && checks < maxChecks) {
|
||||
await new Promise(resolve => setTimeout(resolve, 5000));
|
||||
expect(statusResponse).not.toHaveProperty("partial_data"); // v0
|
||||
expect(statusResponse).not.toHaveProperty("current"); // v0
|
||||
|
@ -236,44 +248,55 @@ describe('FirecrawlApp E2E Tests', () => {
|
|||
expect(statusResponse).toHaveProperty("expiresAt");
|
||||
expect(statusResponse).toHaveProperty("status");
|
||||
expect(statusResponse).toHaveProperty("next");
|
||||
expect(statusResponse.total).toBeGreaterThan(0);
|
||||
expect(statusResponse.creditsUsed).toBeGreaterThan(0);
|
||||
expect(statusResponse.expiresAt.getTime()).toBeGreaterThan(Date.now());
|
||||
expect(statusResponse.status).toBe("scraping");
|
||||
expect(statusResponse.next).toContain("/v1/crawl/");
|
||||
expect(statusResponse.success).toBe(true);
|
||||
if (statusResponse.success === true) {
|
||||
expect(statusResponse.total).toBeGreaterThan(0);
|
||||
expect(statusResponse.creditsUsed).toBeGreaterThan(0);
|
||||
expect(statusResponse.expiresAt.getTime()).toBeGreaterThan(Date.now());
|
||||
expect(statusResponse.status).toBe("scraping");
|
||||
expect(statusResponse.next).toContain("/v1/crawl/");
|
||||
}
|
||||
statusResponse = await app.checkCrawlStatus(response.id) as CrawlStatusResponse;
|
||||
expect(statusResponse.success).toBe(true);
|
||||
checks++;
|
||||
}
|
||||
|
||||
expect(statusResponse).not.toBeNull();
|
||||
expect(statusResponse).toHaveProperty("total");
|
||||
expect(statusResponse.total).toBeGreaterThan(0);
|
||||
expect(statusResponse).toHaveProperty("creditsUsed");
|
||||
expect(statusResponse.creditsUsed).toBeGreaterThan(0);
|
||||
expect(statusResponse).toHaveProperty("expiresAt");
|
||||
expect(statusResponse.expiresAt.getTime()).toBeGreaterThan(Date.now());
|
||||
expect(statusResponse).toHaveProperty("status");
|
||||
expect(statusResponse.status).toBe("completed");
|
||||
expect(statusResponse.data?.length).toBeGreaterThan(0);
|
||||
expect(statusResponse.data?.[0]).toHaveProperty("markdown");
|
||||
expect(statusResponse.data?.[0].markdown?.length).toBeGreaterThan(10);
|
||||
expect(statusResponse.data?.[0]).not.toHaveProperty('content'); // v0
|
||||
expect(statusResponse.data?.[0]).toHaveProperty("html");
|
||||
expect(statusResponse.data?.[0].html).toContain("<div");
|
||||
expect(statusResponse.data?.[0]).toHaveProperty("rawHtml");
|
||||
expect(statusResponse.data?.[0].rawHtml).toContain("<div");
|
||||
expect(statusResponse.data?.[0]).toHaveProperty("screenshot");
|
||||
expect(statusResponse.data?.[0].screenshot).toContain("https://");
|
||||
expect(statusResponse.data?.[0]).toHaveProperty("links");
|
||||
expect(statusResponse.data?.[0].links).not.toBeNull();
|
||||
expect(statusResponse.data?.[0].links?.length).toBeGreaterThan(0);
|
||||
expect(statusResponse.data?.[0]).toHaveProperty("metadata");
|
||||
expect(statusResponse.data?.[0].metadata).toHaveProperty("title");
|
||||
expect(statusResponse.data?.[0].metadata).toHaveProperty("description");
|
||||
expect(statusResponse.data?.[0].metadata).toHaveProperty("language");
|
||||
expect(statusResponse.data?.[0].metadata).toHaveProperty("sourceURL");
|
||||
expect(statusResponse.data?.[0].metadata).toHaveProperty("statusCode");
|
||||
expect(statusResponse.data?.[0].metadata).not.toHaveProperty("error");
|
||||
expect(statusResponse.success).toBe(true);
|
||||
if (statusResponse.success === true) {
|
||||
expect(statusResponse.total).toBeGreaterThan(0);
|
||||
expect(statusResponse).toHaveProperty("creditsUsed");
|
||||
expect(statusResponse.creditsUsed).toBeGreaterThan(0);
|
||||
expect(statusResponse).toHaveProperty("expiresAt");
|
||||
expect(statusResponse.expiresAt.getTime()).toBeGreaterThan(Date.now());
|
||||
expect(statusResponse).toHaveProperty("status");
|
||||
expect(statusResponse.status).toBe("completed");
|
||||
expect(statusResponse.data.length).toBeGreaterThan(0);
|
||||
expect(statusResponse.data[0]).not.toBeNull();
|
||||
expect(statusResponse.data[0]).not.toBeUndefined();
|
||||
if (statusResponse.data[0]) {
|
||||
expect(statusResponse.data[0]).toHaveProperty("markdown");
|
||||
expect(statusResponse.data[0].markdown?.length).toBeGreaterThan(10);
|
||||
expect(statusResponse.data[0]).not.toHaveProperty('content'); // v0
|
||||
expect(statusResponse.data[0]).toHaveProperty("html");
|
||||
expect(statusResponse.data[0].html).toContain("<div");
|
||||
expect(statusResponse.data[0]).toHaveProperty("rawHtml");
|
||||
expect(statusResponse.data[0].rawHtml).toContain("<div");
|
||||
expect(statusResponse.data[0]).toHaveProperty("screenshot");
|
||||
expect(statusResponse.data[0].screenshot).toContain("https://");
|
||||
expect(statusResponse.data[0]).toHaveProperty("links");
|
||||
expect(statusResponse.data[0].links).not.toBeNull();
|
||||
expect(statusResponse.data[0].links?.length).toBeGreaterThan(0);
|
||||
expect(statusResponse.data[0]).toHaveProperty("metadata");
|
||||
expect(statusResponse.data[0].metadata).toHaveProperty("title");
|
||||
expect(statusResponse.data[0].metadata).toHaveProperty("description");
|
||||
expect(statusResponse.data[0].metadata).toHaveProperty("language");
|
||||
expect(statusResponse.data[0].metadata).toHaveProperty("sourceURL");
|
||||
expect(statusResponse.data[0].metadata).toHaveProperty("statusCode");
|
||||
expect(statusResponse.data[0].metadata).not.toHaveProperty("error");
|
||||
}
|
||||
}
|
||||
}, 60000); // 60 seconds timeout
|
||||
|
||||
test.concurrent('should throw error for invalid API key on map', async () => {
|
||||
|
|
|
@ -183,7 +183,11 @@ export default class FirecrawlApp {
|
|||
* @param config - Configuration options for the FirecrawlApp instance.
|
||||
*/
|
||||
constructor({ apiKey = null, apiUrl = null }: FirecrawlAppConfig) {
|
||||
this.apiKey = apiKey || "";
|
||||
if (typeof apiKey !== "string") {
|
||||
throw new Error("No API key provided");
|
||||
}
|
||||
|
||||
this.apiKey = apiKey;
|
||||
this.apiUrl = apiUrl || "https://api.firecrawl.dev";
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user