feat(js-sdk): fixes, update tests

This commit is contained in:
Gergő Móricz 2024-09-11 20:15:34 +02:00
parent 5791d3a0e5
commit 6e1cf2f40d
3 changed files with 118 additions and 91 deletions

View File

@ -1,12 +1,12 @@
{
"name": "@mendable/firecrawl-js",
"version": "1.2.1",
"version": "1.2.3",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "@mendable/firecrawl-js",
"version": "1.2.1",
"version": "1.2.3",
"license": "MIT",
"dependencies": {
"axios": "^1.6.8",

View File

@ -1,4 +1,4 @@
import FirecrawlApp, { CrawlParams, CrawlResponse, CrawlStatusResponse, MapResponse, ScrapeParams, ScrapeResponse } from '../../../index';
import FirecrawlApp, { type CrawlParams, type CrawlResponse, type CrawlStatusResponse, type MapResponse, type ScrapeResponse } from '../../../index';
import { v4 as uuidv4 } from 'uuid';
import dotenv from 'dotenv';
import { describe, test, expect } from '@jest/globals';
@ -6,7 +6,7 @@ import { describe, test, expect } from '@jest/globals';
dotenv.config();
const TEST_API_KEY = process.env.TEST_API_KEY;
const API_URL = "http://127.0.0.1:3002";
const API_URL = "https://api.firecrawl.dev";
describe('FirecrawlApp E2E Tests', () => {
test.concurrent('should throw error for no API key', async () => {
@ -71,6 +71,7 @@ describe('FirecrawlApp E2E Tests', () => {
expect(response.links?.length).toBeGreaterThan(0);
expect(response.links?.[0]).toContain("https://");
expect(response.metadata).not.toBeNull();
expect(response.metadata).not.toBeUndefined();
expect(response.metadata).toHaveProperty("title");
expect(response.metadata).toHaveProperty("description");
expect(response.metadata).toHaveProperty("keywords");
@ -85,19 +86,21 @@ describe('FirecrawlApp E2E Tests', () => {
expect(response.metadata).not.toHaveProperty("pageStatusCode");
expect(response.metadata).toHaveProperty("statusCode");
expect(response.metadata).not.toHaveProperty("pageError");
expect(response.metadata.error).toBeUndefined();
expect(response.metadata.title).toBe("Roast My Website");
expect(response.metadata.description).toBe("Welcome to Roast My Website, the ultimate tool for putting your website through the wringer! This repository harnesses the power of Firecrawl to scrape and capture screenshots of websites, and then unleashes the latest LLM vision models to mercilessly roast them. 🌶️");
expect(response.metadata.keywords).toBe("Roast My Website,Roast,Website,GitHub,Firecrawl");
expect(response.metadata.robots).toBe("follow, index");
expect(response.metadata.ogTitle).toBe("Roast My Website");
expect(response.metadata.ogDescription).toBe("Welcome to Roast My Website, the ultimate tool for putting your website through the wringer! This repository harnesses the power of Firecrawl to scrape and capture screenshots of websites, and then unleashes the latest LLM vision models to mercilessly roast them. 🌶️");
expect(response.metadata.ogUrl).toBe("https://www.roastmywebsite.ai");
expect(response.metadata.ogImage).toBe("https://www.roastmywebsite.ai/og.png");
expect(response.metadata.ogLocaleAlternate).toStrictEqual([]);
expect(response.metadata.ogSiteName).toBe("Roast My Website");
expect(response.metadata.sourceURL).toBe("https://roastmywebsite.ai");
expect(response.metadata.statusCode).toBe(200);
if (response.metadata !== undefined) {
expect(response.metadata.error).toBeUndefined();
expect(response.metadata.title).toBe("Roast My Website");
expect(response.metadata.description).toBe("Welcome to Roast My Website, the ultimate tool for putting your website through the wringer! This repository harnesses the power of Firecrawl to scrape and capture screenshots of websites, and then unleashes the latest LLM vision models to mercilessly roast them. 🌶️");
expect(response.metadata.keywords).toBe("Roast My Website,Roast,Website,GitHub,Firecrawl");
expect(response.metadata.robots).toBe("follow, index");
expect(response.metadata.ogTitle).toBe("Roast My Website");
expect(response.metadata.ogDescription).toBe("Welcome to Roast My Website, the ultimate tool for putting your website through the wringer! This repository harnesses the power of Firecrawl to scrape and capture screenshots of websites, and then unleashes the latest LLM vision models to mercilessly roast them. 🌶️");
expect(response.metadata.ogUrl).toBe("https://www.roastmywebsite.ai");
expect(response.metadata.ogImage).toBe("https://www.roastmywebsite.ai/og.png");
expect(response.metadata.ogLocaleAlternate).toStrictEqual([]);
expect(response.metadata.ogSiteName).toBe("Roast My Website");
expect(response.metadata.sourceURL).toBe("https://roastmywebsite.ai");
expect(response.metadata.statusCode).toBe(200);
}
}, 30000); // 30 seconds timeout
test.concurrent('should return successful response for valid scrape with PDF file', async () => {
@ -127,7 +130,7 @@ describe('FirecrawlApp E2E Tests', () => {
test.concurrent('should return successful response for crawl and wait for completion', async () => {
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
const response = await app.crawlUrl('https://roastmywebsite.ai', {}, true, 30) as CrawlStatusResponse;
const response = await app.crawlUrl('https://roastmywebsite.ai', {}, 30) as CrawlStatusResponse;
expect(response).not.toBeNull();
expect(response).toHaveProperty("total");
expect(response.total).toBeGreaterThan(0);
@ -138,21 +141,25 @@ describe('FirecrawlApp E2E Tests', () => {
expect(response).toHaveProperty("status");
expect(response.status).toBe("completed");
expect(response).not.toHaveProperty("next"); // wait until done
expect(response.data?.length).toBeGreaterThan(0);
expect(response.data?.[0]).toHaveProperty("markdown");
expect(response.data?.[0].markdown).toContain("_Roast_");
expect(response.data?.[0]).not.toHaveProperty('content'); // v0
expect(response.data?.[0]).not.toHaveProperty("html");
expect(response.data?.[0]).not.toHaveProperty("rawHtml");
expect(response.data?.[0]).not.toHaveProperty("screenshot");
expect(response.data?.[0]).not.toHaveProperty("links");
expect(response.data?.[0]).toHaveProperty("metadata");
expect(response.data?.[0].metadata).toHaveProperty("title");
expect(response.data?.[0].metadata).toHaveProperty("description");
expect(response.data?.[0].metadata).toHaveProperty("language");
expect(response.data?.[0].metadata).toHaveProperty("sourceURL");
expect(response.data?.[0].metadata).toHaveProperty("statusCode");
expect(response.data?.[0].metadata).not.toHaveProperty("error");
expect(response.data.length).toBeGreaterThan(0);
expect(response.data[0]).not.toBeNull();
expect(response.data[0]).not.toBeUndefined();
if (response.data[0]) {
expect(response.data[0]).toHaveProperty("markdown");
expect(response.data[0].markdown).toContain("_Roast_");
expect(response.data[0]).not.toHaveProperty('content'); // v0
expect(response.data[0]).not.toHaveProperty("html");
expect(response.data[0]).not.toHaveProperty("rawHtml");
expect(response.data[0]).not.toHaveProperty("screenshot");
expect(response.data[0]).not.toHaveProperty("links");
expect(response.data[0]).toHaveProperty("metadata");
expect(response.data[0].metadata).toHaveProperty("title");
expect(response.data[0].metadata).toHaveProperty("description");
expect(response.data[0].metadata).toHaveProperty("language");
expect(response.data[0].metadata).toHaveProperty("sourceURL");
expect(response.data[0].metadata).toHaveProperty("statusCode");
expect(response.data[0].metadata).not.toHaveProperty("error");
}
}, 60000); // 60 seconds timeout
test.concurrent('should return successful response for crawl with options and wait for completion', async () => {
@ -173,7 +180,7 @@ describe('FirecrawlApp E2E Tests', () => {
onlyMainContent: true,
waitFor: 1000
}
} as CrawlParams, true, 30) as CrawlStatusResponse;
} as CrawlParams, 30) as CrawlStatusResponse;
expect(response).not.toBeNull();
expect(response).toHaveProperty("total");
expect(response.total).toBeGreaterThan(0);
@ -184,41 +191,45 @@ describe('FirecrawlApp E2E Tests', () => {
expect(response).toHaveProperty("status");
expect(response.status).toBe("completed");
expect(response).not.toHaveProperty("next");
expect(response.data?.length).toBeGreaterThan(0);
expect(response.data?.[0]).toHaveProperty("markdown");
expect(response.data?.[0].markdown).toContain("_Roast_");
expect(response.data?.[0]).not.toHaveProperty('content'); // v0
expect(response.data?.[0]).toHaveProperty("html");
expect(response.data?.[0].html).toContain("<h1");
expect(response.data?.[0]).toHaveProperty("rawHtml");
expect(response.data?.[0].rawHtml).toContain("<h1");
expect(response.data?.[0]).toHaveProperty("screenshot");
expect(response.data?.[0].screenshot).toContain("https://");
expect(response.data?.[0]).toHaveProperty("links");
expect(response.data?.[0].links).not.toBeNull();
expect(response.data?.[0].links?.length).toBeGreaterThan(0);
expect(response.data?.[0]).toHaveProperty("metadata");
expect(response.data?.[0].metadata).toHaveProperty("title");
expect(response.data?.[0].metadata).toHaveProperty("description");
expect(response.data?.[0].metadata).toHaveProperty("language");
expect(response.data?.[0].metadata).toHaveProperty("sourceURL");
expect(response.data?.[0].metadata).toHaveProperty("statusCode");
expect(response.data?.[0].metadata).not.toHaveProperty("error");
expect(response.data.length).toBeGreaterThan(0);
expect(response.data[0]).not.toBeNull();
expect(response.data[0]).not.toBeUndefined();
if (response.data[0]) {
expect(response.data[0]).toHaveProperty("markdown");
expect(response.data[0].markdown).toContain("_Roast_");
expect(response.data[0]).not.toHaveProperty('content'); // v0
expect(response.data[0]).toHaveProperty("html");
expect(response.data[0].html).toContain("<h1");
expect(response.data[0]).toHaveProperty("rawHtml");
expect(response.data[0].rawHtml).toContain("<h1");
expect(response.data[0]).toHaveProperty("screenshot");
expect(response.data[0].screenshot).toContain("https://");
expect(response.data[0]).toHaveProperty("links");
expect(response.data[0].links).not.toBeNull();
expect(response.data[0].links?.length).toBeGreaterThan(0);
expect(response.data[0]).toHaveProperty("metadata");
expect(response.data[0].metadata).toHaveProperty("title");
expect(response.data[0].metadata).toHaveProperty("description");
expect(response.data[0].metadata).toHaveProperty("language");
expect(response.data[0].metadata).toHaveProperty("sourceURL");
expect(response.data[0].metadata).toHaveProperty("statusCode");
expect(response.data[0].metadata).not.toHaveProperty("error");
}
}, 60000); // 60 seconds timeout
test.concurrent('should handle idempotency key for crawl', async () => {
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
const uniqueIdempotencyKey = uuidv4();
const response = await app.crawlUrl('https://roastmywebsite.ai', {}, false, 2, uniqueIdempotencyKey) as CrawlResponse;
const response = await app.asyncCrawlUrl('https://roastmywebsite.ai', {}, uniqueIdempotencyKey) as CrawlResponse;
expect(response).not.toBeNull();
expect(response.id).toBeDefined();
await expect(app.crawlUrl('https://roastmywebsite.ai', {}, true, 2, uniqueIdempotencyKey)).rejects.toThrow("Request failed with status code 409");
await expect(app.crawlUrl('https://roastmywebsite.ai', {}, 2, uniqueIdempotencyKey)).rejects.toThrow("Request failed with status code 409");
});
test.concurrent('should check crawl status', async () => {
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
const response = await app.crawlUrl('https://firecrawl.dev', { scrapeOptions: { formats: ['markdown', 'html', 'rawHtml', 'screenshot', 'links']}} as CrawlParams, false) as CrawlResponse;
const response = await app.asyncCrawlUrl('https://firecrawl.dev', { scrapeOptions: { formats: ['markdown', 'html', 'rawHtml', 'screenshot', 'links']}} as CrawlParams) as CrawlResponse;
expect(response).not.toBeNull();
expect(response.id).toBeDefined();
@ -226,7 +237,8 @@ describe('FirecrawlApp E2E Tests', () => {
const maxChecks = 15;
let checks = 0;
while (statusResponse.status === 'scraping' && checks < maxChecks) {
expect(statusResponse.success).toBe(true);
while ((statusResponse as any).status === 'scraping' && checks < maxChecks) {
await new Promise(resolve => setTimeout(resolve, 5000));
expect(statusResponse).not.toHaveProperty("partial_data"); // v0
expect(statusResponse).not.toHaveProperty("current"); // v0
@ -236,44 +248,55 @@ describe('FirecrawlApp E2E Tests', () => {
expect(statusResponse).toHaveProperty("expiresAt");
expect(statusResponse).toHaveProperty("status");
expect(statusResponse).toHaveProperty("next");
expect(statusResponse.total).toBeGreaterThan(0);
expect(statusResponse.creditsUsed).toBeGreaterThan(0);
expect(statusResponse.expiresAt.getTime()).toBeGreaterThan(Date.now());
expect(statusResponse.status).toBe("scraping");
expect(statusResponse.next).toContain("/v1/crawl/");
expect(statusResponse.success).toBe(true);
if (statusResponse.success === true) {
expect(statusResponse.total).toBeGreaterThan(0);
expect(statusResponse.creditsUsed).toBeGreaterThan(0);
expect(statusResponse.expiresAt.getTime()).toBeGreaterThan(Date.now());
expect(statusResponse.status).toBe("scraping");
expect(statusResponse.next).toContain("/v1/crawl/");
}
statusResponse = await app.checkCrawlStatus(response.id) as CrawlStatusResponse;
expect(statusResponse.success).toBe(true);
checks++;
}
expect(statusResponse).not.toBeNull();
expect(statusResponse).toHaveProperty("total");
expect(statusResponse.total).toBeGreaterThan(0);
expect(statusResponse).toHaveProperty("creditsUsed");
expect(statusResponse.creditsUsed).toBeGreaterThan(0);
expect(statusResponse).toHaveProperty("expiresAt");
expect(statusResponse.expiresAt.getTime()).toBeGreaterThan(Date.now());
expect(statusResponse).toHaveProperty("status");
expect(statusResponse.status).toBe("completed");
expect(statusResponse.data?.length).toBeGreaterThan(0);
expect(statusResponse.data?.[0]).toHaveProperty("markdown");
expect(statusResponse.data?.[0].markdown?.length).toBeGreaterThan(10);
expect(statusResponse.data?.[0]).not.toHaveProperty('content'); // v0
expect(statusResponse.data?.[0]).toHaveProperty("html");
expect(statusResponse.data?.[0].html).toContain("<div");
expect(statusResponse.data?.[0]).toHaveProperty("rawHtml");
expect(statusResponse.data?.[0].rawHtml).toContain("<div");
expect(statusResponse.data?.[0]).toHaveProperty("screenshot");
expect(statusResponse.data?.[0].screenshot).toContain("https://");
expect(statusResponse.data?.[0]).toHaveProperty("links");
expect(statusResponse.data?.[0].links).not.toBeNull();
expect(statusResponse.data?.[0].links?.length).toBeGreaterThan(0);
expect(statusResponse.data?.[0]).toHaveProperty("metadata");
expect(statusResponse.data?.[0].metadata).toHaveProperty("title");
expect(statusResponse.data?.[0].metadata).toHaveProperty("description");
expect(statusResponse.data?.[0].metadata).toHaveProperty("language");
expect(statusResponse.data?.[0].metadata).toHaveProperty("sourceURL");
expect(statusResponse.data?.[0].metadata).toHaveProperty("statusCode");
expect(statusResponse.data?.[0].metadata).not.toHaveProperty("error");
expect(statusResponse.success).toBe(true);
if (statusResponse.success === true) {
expect(statusResponse.total).toBeGreaterThan(0);
expect(statusResponse).toHaveProperty("creditsUsed");
expect(statusResponse.creditsUsed).toBeGreaterThan(0);
expect(statusResponse).toHaveProperty("expiresAt");
expect(statusResponse.expiresAt.getTime()).toBeGreaterThan(Date.now());
expect(statusResponse).toHaveProperty("status");
expect(statusResponse.status).toBe("completed");
expect(statusResponse.data.length).toBeGreaterThan(0);
expect(statusResponse.data[0]).not.toBeNull();
expect(statusResponse.data[0]).not.toBeUndefined();
if (statusResponse.data[0]) {
expect(statusResponse.data[0]).toHaveProperty("markdown");
expect(statusResponse.data[0].markdown?.length).toBeGreaterThan(10);
expect(statusResponse.data[0]).not.toHaveProperty('content'); // v0
expect(statusResponse.data[0]).toHaveProperty("html");
expect(statusResponse.data[0].html).toContain("<div");
expect(statusResponse.data[0]).toHaveProperty("rawHtml");
expect(statusResponse.data[0].rawHtml).toContain("<div");
expect(statusResponse.data[0]).toHaveProperty("screenshot");
expect(statusResponse.data[0].screenshot).toContain("https://");
expect(statusResponse.data[0]).toHaveProperty("links");
expect(statusResponse.data[0].links).not.toBeNull();
expect(statusResponse.data[0].links?.length).toBeGreaterThan(0);
expect(statusResponse.data[0]).toHaveProperty("metadata");
expect(statusResponse.data[0].metadata).toHaveProperty("title");
expect(statusResponse.data[0].metadata).toHaveProperty("description");
expect(statusResponse.data[0].metadata).toHaveProperty("language");
expect(statusResponse.data[0].metadata).toHaveProperty("sourceURL");
expect(statusResponse.data[0].metadata).toHaveProperty("statusCode");
expect(statusResponse.data[0].metadata).not.toHaveProperty("error");
}
}
}, 60000); // 60 seconds timeout
test.concurrent('should throw error for invalid API key on map', async () => {

View File

@ -183,7 +183,11 @@ export default class FirecrawlApp {
* @param config - Configuration options for the FirecrawlApp instance.
*/
constructor({ apiKey = null, apiUrl = null }: FirecrawlAppConfig) {
this.apiKey = apiKey || "";
if (typeof apiKey !== "string") {
throw new Error("No API key provided");
}
this.apiKey = apiKey;
this.apiUrl = apiUrl || "https://api.firecrawl.dev";
}