From fe8f9d4b2ff2cb822be1a34eabd2125d4fc5db5c Mon Sep 17 00:00:00 2001 From: Andrei Bobkov Date: Tue, 3 Sep 2024 10:50:52 +0200 Subject: [PATCH] feat(js-sdk): drop `commonjs` outputs and simplify build process --- apps/js-sdk/firecrawl/build/cjs/index.js | 347 ------------------- apps/js-sdk/firecrawl/build/cjs/package.json | 1 - apps/js-sdk/firecrawl/build/esm/index.js | 339 ------------------ apps/js-sdk/firecrawl/build/esm/package.json | 1 - apps/js-sdk/firecrawl/package.json | 16 +- apps/js-sdk/firecrawl/src/index.ts | 2 +- apps/js-sdk/firecrawl/types/index.d.ts | 260 -------------- 7 files changed, 4 insertions(+), 962 deletions(-) delete mode 100644 apps/js-sdk/firecrawl/build/cjs/index.js delete mode 100644 apps/js-sdk/firecrawl/build/cjs/package.json delete mode 100644 apps/js-sdk/firecrawl/build/esm/index.js delete mode 100644 apps/js-sdk/firecrawl/build/esm/package.json delete mode 100644 apps/js-sdk/firecrawl/types/index.d.ts diff --git a/apps/js-sdk/firecrawl/build/cjs/index.js b/apps/js-sdk/firecrawl/build/cjs/index.js deleted file mode 100644 index 2908b09d..00000000 --- a/apps/js-sdk/firecrawl/build/cjs/index.js +++ /dev/null @@ -1,347 +0,0 @@ -"use strict"; -var __importDefault = (this && this.__importDefault) || function (mod) { - return (mod && mod.__esModule) ? mod : { "default": mod }; -}; -Object.defineProperty(exports, "__esModule", { value: true }); -exports.CrawlWatcher = void 0; -const axios_1 = __importDefault(require("axios")); -const zod_to_json_schema_1 = require("zod-to-json-schema"); -const isows_1 = require("isows"); -const typescript_event_target_1 = require("typescript-event-target"); -/** - * Main class for interacting with the Firecrawl API. - * Provides methods for scraping, searching, crawling, and mapping web content. - */ -class FirecrawlApp { - /** - * Initializes a new instance of the FirecrawlApp class. - * @param config - Configuration options for the FirecrawlApp instance. - */ - constructor({ apiKey = null, apiUrl = null }) { - this.apiKey = apiKey || ""; - this.apiUrl = apiUrl || "https://api.firecrawl.dev"; - } - /** - * Scrapes a URL using the Firecrawl API. - * @param url - The URL to scrape. - * @param params - Additional parameters for the scrape request. - * @returns The response from the scrape operation. - */ - async scrapeUrl(url, params) { - const headers = { - "Content-Type": "application/json", - Authorization: `Bearer ${this.apiKey}`, - }; - let jsonData = { url, ...params }; - if (jsonData?.extract?.schema) { - let schema = jsonData.extract.schema; - // Try parsing the schema as a Zod schema - try { - schema = (0, zod_to_json_schema_1.zodToJsonSchema)(schema); - } - catch (error) { - } - jsonData = { - ...jsonData, - extract: { - ...jsonData.extract, - schema: schema, - }, - }; - } - try { - const response = await axios_1.default.post(this.apiUrl + `/v1/scrape`, jsonData, { headers }); - if (response.status === 200) { - const responseData = response.data; - if (responseData.success) { - return { - success: true, - warning: responseData.warning, - error: responseData.error, - ...responseData.data - }; - } - else { - throw new Error(`Failed to scrape URL. Error: ${responseData.error}`); - } - } - else { - this.handleError(response, "scrape URL"); - } - } - catch (error) { - throw new Error(error.message); - } - return { success: false, error: "Internal server error." }; - } - /** - * This method is intended to search for a query using the Firecrawl API. However, it is not supported in version 1 of the API. - * @param query - The search query string. - * @param params - Additional parameters for the search. - * @returns Throws an error advising to use version 0 of the API. - */ - async search(query, params) { - throw new Error("Search is not supported in v1, please update FirecrawlApp() initialization to use v0."); - } - /** - * Initiates a crawl job for a URL using the Firecrawl API. - * @param url - The URL to crawl. - * @param params - Additional parameters for the crawl request. - * @param pollInterval - Time in seconds for job status checks. - * @param idempotencyKey - Optional idempotency key for the request. - * @returns The response from the crawl operation. - */ - async crawlUrl(url, params, pollInterval = 2, idempotencyKey) { - const headers = this.prepareHeaders(idempotencyKey); - let jsonData = { url, ...params }; - try { - const response = await this.postRequest(this.apiUrl + `/v1/crawl`, jsonData, headers); - if (response.status === 200) { - const id = response.data.id; - return this.monitorJobStatus(id, headers, pollInterval); - } - else { - this.handleError(response, "start crawl job"); - } - } - catch (error) { - if (error.response?.data?.error) { - throw new Error(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ''}`); - } - else { - throw new Error(error.message); - } - } - return { success: false, error: "Internal server error." }; - } - async asyncCrawlUrl(url, params, idempotencyKey) { - const headers = this.prepareHeaders(idempotencyKey); - let jsonData = { url, ...params }; - try { - const response = await this.postRequest(this.apiUrl + `/v1/crawl`, jsonData, headers); - if (response.status === 200) { - return response.data; - } - else { - this.handleError(response, "start crawl job"); - } - } - catch (error) { - if (error.response?.data?.error) { - throw new Error(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ''}`); - } - else { - throw new Error(error.message); - } - } - return { success: false, error: "Internal server error." }; - } - /** - * Checks the status of a crawl job using the Firecrawl API. - * @param id - The ID of the crawl operation. - * @returns The response containing the job status. - */ - async checkCrawlStatus(id) { - if (!id) { - throw new Error("No crawl ID provided"); - } - const headers = this.prepareHeaders(); - try { - const response = await this.getRequest(`${this.apiUrl}/v1/crawl/${id}`, headers); - if (response.status === 200) { - return ({ - success: true, - status: response.data.status, - total: response.data.total, - completed: response.data.completed, - creditsUsed: response.data.creditsUsed, - expiresAt: new Date(response.data.expiresAt), - next: response.data.next, - data: response.data.data, - error: response.data.error - }); - } - else { - this.handleError(response, "check crawl status"); - } - } - catch (error) { - throw new Error(error.message); - } - return { success: false, error: "Internal server error." }; - } - async crawlUrlAndWatch(url, params, idempotencyKey) { - const crawl = await this.asyncCrawlUrl(url, params, idempotencyKey); - if (crawl.success && crawl.id) { - const id = crawl.id; - return new CrawlWatcher(id, this); - } - throw new Error("Crawl job failed to start"); - } - async mapUrl(url, params) { - const headers = this.prepareHeaders(); - let jsonData = { url, ...params }; - try { - const response = await this.postRequest(this.apiUrl + `/v1/map`, jsonData, headers); - if (response.status === 200) { - return response.data; - } - else { - this.handleError(response, "map"); - } - } - catch (error) { - throw new Error(error.message); - } - return { success: false, error: "Internal server error." }; - } - /** - * Prepares the headers for an API request. - * @param idempotencyKey - Optional key to ensure idempotency. - * @returns The prepared headers. - */ - prepareHeaders(idempotencyKey) { - return { - "Content-Type": "application/json", - Authorization: `Bearer ${this.apiKey}`, - ...(idempotencyKey ? { "x-idempotency-key": idempotencyKey } : {}), - }; - } - /** - * Sends a POST request to the specified URL. - * @param url - The URL to send the request to. - * @param data - The data to send in the request. - * @param headers - The headers for the request. - * @returns The response from the POST request. - */ - postRequest(url, data, headers) { - return axios_1.default.post(url, data, { headers }); - } - /** - * Sends a GET request to the specified URL. - * @param url - The URL to send the request to. - * @param headers - The headers for the request. - * @returns The response from the GET request. - */ - getRequest(url, headers) { - return axios_1.default.get(url, { headers }); - } - /** - * Monitors the status of a crawl job until completion or failure. - * @param id - The ID of the crawl operation. - * @param headers - The headers for the request. - * @param checkInterval - Interval in seconds for job status checks. - * @param checkUrl - Optional URL to check the status (used for v1 API) - * @returns The final job status or data. - */ - async monitorJobStatus(id, headers, checkInterval) { - while (true) { - const statusResponse = await this.getRequest(`${this.apiUrl}/v1/crawl/${id}`, headers); - if (statusResponse.status === 200) { - const statusData = statusResponse.data; - if (statusData.status === "completed") { - if ("data" in statusData) { - return statusData; - } - else { - throw new Error("Crawl job completed but no data was returned"); - } - } - else if (["active", "paused", "pending", "queued", "scraping"].includes(statusData.status)) { - checkInterval = Math.max(checkInterval, 2); - await new Promise((resolve) => setTimeout(resolve, checkInterval * 1000)); - } - else { - throw new Error(`Crawl job failed or was stopped. Status: ${statusData.status}`); - } - } - else { - this.handleError(statusResponse, "check crawl status"); - } - } - } - /** - * Handles errors from API responses. - * @param {AxiosResponse} response - The response from the API. - * @param {string} action - The action being performed when the error occurred. - */ - handleError(response, action) { - if ([402, 408, 409, 500].includes(response.status)) { - const errorMessage = response.data.error || "Unknown error occurred"; - throw new Error(`Failed to ${action}. Status code: ${response.status}. Error: ${errorMessage}`); - } - else { - throw new Error(`Unexpected error occurred while trying to ${action}. Status code: ${response.status}`); - } - } -} -exports.default = FirecrawlApp; -class CrawlWatcher extends typescript_event_target_1.TypedEventTarget { - constructor(id, app) { - super(); - this.ws = new isows_1.WebSocket(`${app.apiUrl}/v1/crawl/${id}`, app.apiKey); - this.status = "scraping"; - this.data = []; - const messageHandler = (msg) => { - if (msg.type === "done") { - this.status = "completed"; - this.dispatchTypedEvent("done", new CustomEvent("done", { - detail: { - status: this.status, - data: this.data, - }, - })); - } - else if (msg.type === "error") { - this.status = "failed"; - this.dispatchTypedEvent("error", new CustomEvent("error", { - detail: { - status: this.status, - data: this.data, - error: msg.error, - }, - })); - } - else if (msg.type === "catchup") { - this.status = msg.data.status; - this.data.push(...(msg.data.data ?? [])); - for (const doc of this.data) { - this.dispatchTypedEvent("document", new CustomEvent("document", { - detail: doc, - })); - } - } - else if (msg.type === "document") { - this.dispatchTypedEvent("document", new CustomEvent("document", { - detail: msg.data, - })); - } - }; - this.ws.onmessage = ((ev) => { - if (typeof ev.data !== "string") { - this.ws.close(); - return; - } - const msg = JSON.parse(ev.data); - messageHandler(msg); - }).bind(this); - this.ws.onclose = ((ev) => { - const msg = JSON.parse(ev.reason); - messageHandler(msg); - }).bind(this); - this.ws.onerror = ((_) => { - this.status = "failed"; - this.dispatchTypedEvent("error", new CustomEvent("error", { - detail: { - status: this.status, - data: this.data, - error: "WebSocket error", - }, - })); - }).bind(this); - } - close() { - this.ws.close(); - } -} -exports.CrawlWatcher = CrawlWatcher; diff --git a/apps/js-sdk/firecrawl/build/cjs/package.json b/apps/js-sdk/firecrawl/build/cjs/package.json deleted file mode 100644 index b731bd61..00000000 --- a/apps/js-sdk/firecrawl/build/cjs/package.json +++ /dev/null @@ -1 +0,0 @@ -{"type": "commonjs"} diff --git a/apps/js-sdk/firecrawl/build/esm/index.js b/apps/js-sdk/firecrawl/build/esm/index.js deleted file mode 100644 index 4245cc37..00000000 --- a/apps/js-sdk/firecrawl/build/esm/index.js +++ /dev/null @@ -1,339 +0,0 @@ -import axios from "axios"; -import { zodToJsonSchema } from "zod-to-json-schema"; -import { WebSocket } from "isows"; -import { TypedEventTarget } from "typescript-event-target"; -/** - * Main class for interacting with the Firecrawl API. - * Provides methods for scraping, searching, crawling, and mapping web content. - */ -export default class FirecrawlApp { - /** - * Initializes a new instance of the FirecrawlApp class. - * @param config - Configuration options for the FirecrawlApp instance. - */ - constructor({ apiKey = null, apiUrl = null }) { - this.apiKey = apiKey || ""; - this.apiUrl = apiUrl || "https://api.firecrawl.dev"; - } - /** - * Scrapes a URL using the Firecrawl API. - * @param url - The URL to scrape. - * @param params - Additional parameters for the scrape request. - * @returns The response from the scrape operation. - */ - async scrapeUrl(url, params) { - const headers = { - "Content-Type": "application/json", - Authorization: `Bearer ${this.apiKey}`, - }; - let jsonData = { url, ...params }; - if (jsonData?.extract?.schema) { - let schema = jsonData.extract.schema; - // Try parsing the schema as a Zod schema - try { - schema = zodToJsonSchema(schema); - } - catch (error) { - } - jsonData = { - ...jsonData, - extract: { - ...jsonData.extract, - schema: schema, - }, - }; - } - try { - const response = await axios.post(this.apiUrl + `/v1/scrape`, jsonData, { headers }); - if (response.status === 200) { - const responseData = response.data; - if (responseData.success) { - return { - success: true, - warning: responseData.warning, - error: responseData.error, - ...responseData.data - }; - } - else { - throw new Error(`Failed to scrape URL. Error: ${responseData.error}`); - } - } - else { - this.handleError(response, "scrape URL"); - } - } - catch (error) { - throw new Error(error.message); - } - return { success: false, error: "Internal server error." }; - } - /** - * This method is intended to search for a query using the Firecrawl API. However, it is not supported in version 1 of the API. - * @param query - The search query string. - * @param params - Additional parameters for the search. - * @returns Throws an error advising to use version 0 of the API. - */ - async search(query, params) { - throw new Error("Search is not supported in v1, please update FirecrawlApp() initialization to use v0."); - } - /** - * Initiates a crawl job for a URL using the Firecrawl API. - * @param url - The URL to crawl. - * @param params - Additional parameters for the crawl request. - * @param pollInterval - Time in seconds for job status checks. - * @param idempotencyKey - Optional idempotency key for the request. - * @returns The response from the crawl operation. - */ - async crawlUrl(url, params, pollInterval = 2, idempotencyKey) { - const headers = this.prepareHeaders(idempotencyKey); - let jsonData = { url, ...params }; - try { - const response = await this.postRequest(this.apiUrl + `/v1/crawl`, jsonData, headers); - if (response.status === 200) { - const id = response.data.id; - return this.monitorJobStatus(id, headers, pollInterval); - } - else { - this.handleError(response, "start crawl job"); - } - } - catch (error) { - if (error.response?.data?.error) { - throw new Error(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ''}`); - } - else { - throw new Error(error.message); - } - } - return { success: false, error: "Internal server error." }; - } - async asyncCrawlUrl(url, params, idempotencyKey) { - const headers = this.prepareHeaders(idempotencyKey); - let jsonData = { url, ...params }; - try { - const response = await this.postRequest(this.apiUrl + `/v1/crawl`, jsonData, headers); - if (response.status === 200) { - return response.data; - } - else { - this.handleError(response, "start crawl job"); - } - } - catch (error) { - if (error.response?.data?.error) { - throw new Error(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ''}`); - } - else { - throw new Error(error.message); - } - } - return { success: false, error: "Internal server error." }; - } - /** - * Checks the status of a crawl job using the Firecrawl API. - * @param id - The ID of the crawl operation. - * @returns The response containing the job status. - */ - async checkCrawlStatus(id) { - if (!id) { - throw new Error("No crawl ID provided"); - } - const headers = this.prepareHeaders(); - try { - const response = await this.getRequest(`${this.apiUrl}/v1/crawl/${id}`, headers); - if (response.status === 200) { - return ({ - success: true, - status: response.data.status, - total: response.data.total, - completed: response.data.completed, - creditsUsed: response.data.creditsUsed, - expiresAt: new Date(response.data.expiresAt), - next: response.data.next, - data: response.data.data, - error: response.data.error - }); - } - else { - this.handleError(response, "check crawl status"); - } - } - catch (error) { - throw new Error(error.message); - } - return { success: false, error: "Internal server error." }; - } - async crawlUrlAndWatch(url, params, idempotencyKey) { - const crawl = await this.asyncCrawlUrl(url, params, idempotencyKey); - if (crawl.success && crawl.id) { - const id = crawl.id; - return new CrawlWatcher(id, this); - } - throw new Error("Crawl job failed to start"); - } - async mapUrl(url, params) { - const headers = this.prepareHeaders(); - let jsonData = { url, ...params }; - try { - const response = await this.postRequest(this.apiUrl + `/v1/map`, jsonData, headers); - if (response.status === 200) { - return response.data; - } - else { - this.handleError(response, "map"); - } - } - catch (error) { - throw new Error(error.message); - } - return { success: false, error: "Internal server error." }; - } - /** - * Prepares the headers for an API request. - * @param idempotencyKey - Optional key to ensure idempotency. - * @returns The prepared headers. - */ - prepareHeaders(idempotencyKey) { - return { - "Content-Type": "application/json", - Authorization: `Bearer ${this.apiKey}`, - ...(idempotencyKey ? { "x-idempotency-key": idempotencyKey } : {}), - }; - } - /** - * Sends a POST request to the specified URL. - * @param url - The URL to send the request to. - * @param data - The data to send in the request. - * @param headers - The headers for the request. - * @returns The response from the POST request. - */ - postRequest(url, data, headers) { - return axios.post(url, data, { headers }); - } - /** - * Sends a GET request to the specified URL. - * @param url - The URL to send the request to. - * @param headers - The headers for the request. - * @returns The response from the GET request. - */ - getRequest(url, headers) { - return axios.get(url, { headers }); - } - /** - * Monitors the status of a crawl job until completion or failure. - * @param id - The ID of the crawl operation. - * @param headers - The headers for the request. - * @param checkInterval - Interval in seconds for job status checks. - * @param checkUrl - Optional URL to check the status (used for v1 API) - * @returns The final job status or data. - */ - async monitorJobStatus(id, headers, checkInterval) { - while (true) { - const statusResponse = await this.getRequest(`${this.apiUrl}/v1/crawl/${id}`, headers); - if (statusResponse.status === 200) { - const statusData = statusResponse.data; - if (statusData.status === "completed") { - if ("data" in statusData) { - return statusData; - } - else { - throw new Error("Crawl job completed but no data was returned"); - } - } - else if (["active", "paused", "pending", "queued", "scraping"].includes(statusData.status)) { - checkInterval = Math.max(checkInterval, 2); - await new Promise((resolve) => setTimeout(resolve, checkInterval * 1000)); - } - else { - throw new Error(`Crawl job failed or was stopped. Status: ${statusData.status}`); - } - } - else { - this.handleError(statusResponse, "check crawl status"); - } - } - } - /** - * Handles errors from API responses. - * @param {AxiosResponse} response - The response from the API. - * @param {string} action - The action being performed when the error occurred. - */ - handleError(response, action) { - if ([402, 408, 409, 500].includes(response.status)) { - const errorMessage = response.data.error || "Unknown error occurred"; - throw new Error(`Failed to ${action}. Status code: ${response.status}. Error: ${errorMessage}`); - } - else { - throw new Error(`Unexpected error occurred while trying to ${action}. Status code: ${response.status}`); - } - } -} -export class CrawlWatcher extends TypedEventTarget { - constructor(id, app) { - super(); - this.ws = new WebSocket(`${app.apiUrl}/v1/crawl/${id}`, app.apiKey); - this.status = "scraping"; - this.data = []; - const messageHandler = (msg) => { - if (msg.type === "done") { - this.status = "completed"; - this.dispatchTypedEvent("done", new CustomEvent("done", { - detail: { - status: this.status, - data: this.data, - }, - })); - } - else if (msg.type === "error") { - this.status = "failed"; - this.dispatchTypedEvent("error", new CustomEvent("error", { - detail: { - status: this.status, - data: this.data, - error: msg.error, - }, - })); - } - else if (msg.type === "catchup") { - this.status = msg.data.status; - this.data.push(...(msg.data.data ?? [])); - for (const doc of this.data) { - this.dispatchTypedEvent("document", new CustomEvent("document", { - detail: doc, - })); - } - } - else if (msg.type === "document") { - this.dispatchTypedEvent("document", new CustomEvent("document", { - detail: msg.data, - })); - } - }; - this.ws.onmessage = ((ev) => { - if (typeof ev.data !== "string") { - this.ws.close(); - return; - } - const msg = JSON.parse(ev.data); - messageHandler(msg); - }).bind(this); - this.ws.onclose = ((ev) => { - const msg = JSON.parse(ev.reason); - messageHandler(msg); - }).bind(this); - this.ws.onerror = ((_) => { - this.status = "failed"; - this.dispatchTypedEvent("error", new CustomEvent("error", { - detail: { - status: this.status, - data: this.data, - error: "WebSocket error", - }, - })); - }).bind(this); - } - close() { - this.ws.close(); - } -} diff --git a/apps/js-sdk/firecrawl/build/esm/package.json b/apps/js-sdk/firecrawl/build/esm/package.json deleted file mode 100644 index 6990891f..00000000 --- a/apps/js-sdk/firecrawl/build/esm/package.json +++ /dev/null @@ -1 +0,0 @@ -{"type": "module"} diff --git a/apps/js-sdk/firecrawl/package.json b/apps/js-sdk/firecrawl/package.json index e68b3014..430cffff 100644 --- a/apps/js-sdk/firecrawl/package.json +++ b/apps/js-sdk/firecrawl/package.json @@ -2,21 +2,11 @@ "name": "@mendable/firecrawl-js", "version": "1.2.1", "description": "JavaScript SDK for Firecrawl API", - "main": "build/cjs/index.js", - "types": "types/index.d.ts", + "main": "dist/index.js", + "types": "dist/index.d.ts", "type": "module", - "exports": { - "require": { - "types": "./types/index.d.ts", - "default": "./build/cjs/index.js" - }, - "import": { - "types": "./types/index.d.ts", - "default": "./build/esm/index.js" - } - }, "scripts": { - "build": "tsc --module commonjs --moduleResolution node10 --outDir build/cjs/ && echo '{\"type\": \"commonjs\"}' > build/cjs/package.json && npx tsc --module NodeNext --moduleResolution NodeNext --outDir build/esm/ && echo '{\"type\": \"module\"}' > build/esm/package.json", + "build": "tsc", "build-and-publish": "npm run build && npm publish --access public", "publish-beta": "npm run build && npm publish --access public --tag beta", "test": "NODE_OPTIONS=--experimental-vm-modules jest --verbose src/__tests__/v1/**/*.test.ts" diff --git a/apps/js-sdk/firecrawl/src/index.ts b/apps/js-sdk/firecrawl/src/index.ts index 1d1bb4ee..e9411527 100644 --- a/apps/js-sdk/firecrawl/src/index.ts +++ b/apps/js-sdk/firecrawl/src/index.ts @@ -1,4 +1,4 @@ -import axios, { AxiosResponse, AxiosRequestHeaders } from "axios"; +import axios, { type AxiosResponse, type AxiosRequestHeaders } from "axios"; import { z } from "zod"; import { zodToJsonSchema } from "zod-to-json-schema"; import { WebSocket } from "isows"; diff --git a/apps/js-sdk/firecrawl/types/index.d.ts b/apps/js-sdk/firecrawl/types/index.d.ts deleted file mode 100644 index 36356c4e..00000000 --- a/apps/js-sdk/firecrawl/types/index.d.ts +++ /dev/null @@ -1,260 +0,0 @@ -import { AxiosResponse, AxiosRequestHeaders } from "axios"; -import { z } from "zod"; -import { TypedEventTarget } from "typescript-event-target"; -/** - * Configuration interface for FirecrawlApp. - * @param apiKey - Optional API key for authentication. - * @param apiUrl - Optional base URL of the API; defaults to 'https://api.firecrawl.dev'. - */ -export interface FirecrawlAppConfig { - apiKey?: string | null; - apiUrl?: string | null; -} -/** - * Metadata for a Firecrawl document. - * Includes various optional properties for document metadata. - */ -export interface FirecrawlDocumentMetadata { - title?: string; - description?: string; - language?: string; - keywords?: string; - robots?: string; - ogTitle?: string; - ogDescription?: string; - ogUrl?: string; - ogImage?: string; - ogAudio?: string; - ogDeterminer?: string; - ogLocale?: string; - ogLocaleAlternate?: string[]; - ogSiteName?: string; - ogVideo?: string; - dctermsCreated?: string; - dcDateCreated?: string; - dcDate?: string; - dctermsType?: string; - dcType?: string; - dctermsAudience?: string; - dctermsSubject?: string; - dcSubject?: string; - dcDescription?: string; - dctermsKeywords?: string; - modifiedTime?: string; - publishedTime?: string; - articleTag?: string; - articleSection?: string; - sourceURL?: string; - statusCode?: number; - error?: string; - [key: string]: any; -} -/** - * Document interface for Firecrawl. - * Represents a document retrieved or processed by Firecrawl. - */ -export interface FirecrawlDocument { - url?: string; - markdown?: string; - html?: string; - rawHtml?: string; - links?: string[]; - extract?: Record; - screenshot?: string; - metadata?: FirecrawlDocumentMetadata; -} -/** - * Parameters for scraping operations. - * Defines the options and configurations available for scraping web content. - */ -export interface ScrapeParams { - formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "extract" | "full@scrennshot")[]; - headers?: Record; - includeTags?: string[]; - excludeTags?: string[]; - onlyMainContent?: boolean; - extract?: { - prompt?: string; - schema?: z.ZodSchema | any; - systemPrompt?: string; - }; - waitFor?: number; - timeout?: number; -} -/** - * Response interface for scraping operations. - * Defines the structure of the response received after a scraping operation. - */ -export interface ScrapeResponse extends FirecrawlDocument { - success: true; - warning?: string; - error?: string; -} -/** - * Parameters for crawling operations. - * Includes options for both scraping and mapping during a crawl. - */ -export interface CrawlParams { - includePaths?: string[]; - excludePaths?: string[]; - maxDepth?: number; - limit?: number; - allowBackwardLinks?: boolean; - allowExternalLinks?: boolean; - ignoreSitemap?: boolean; - scrapeOptions?: ScrapeParams; - webhook?: string; -} -/** - * Response interface for crawling operations. - * Defines the structure of the response received after initiating a crawl. - */ -export interface CrawlResponse { - id?: string; - url?: string; - success: true; - error?: string; -} -/** - * Response interface for job status checks. - * Provides detailed status of a crawl job including progress and results. - */ -export interface CrawlStatusResponse { - success: true; - total: number; - completed: number; - creditsUsed: number; - expiresAt: Date; - status: "scraping" | "completed" | "failed"; - next: string; - data?: FirecrawlDocument[]; - error?: string; -} -/** - * Parameters for mapping operations. - * Defines options for mapping URLs during a crawl. - */ -export interface MapParams { - search?: string; - ignoreSitemap?: boolean; - includeSubdomains?: boolean; - limit?: number; -} -/** - * Response interface for mapping operations. - * Defines the structure of the response received after a mapping operation. - */ -export interface MapResponse { - success: true; - links?: string[]; - error?: string; -} -/** - * Error response interface. - * Defines the structure of the response received when an error occurs. - */ -export interface ErrorResponse { - success: false; - error: string; -} -/** - * Main class for interacting with the Firecrawl API. - * Provides methods for scraping, searching, crawling, and mapping web content. - */ -export default class FirecrawlApp { - apiKey: string; - apiUrl: string; - /** - * Initializes a new instance of the FirecrawlApp class. - * @param config - Configuration options for the FirecrawlApp instance. - */ - constructor({ apiKey, apiUrl }: FirecrawlAppConfig); - /** - * Scrapes a URL using the Firecrawl API. - * @param url - The URL to scrape. - * @param params - Additional parameters for the scrape request. - * @returns The response from the scrape operation. - */ - scrapeUrl(url: string, params?: ScrapeParams): Promise; - /** - * This method is intended to search for a query using the Firecrawl API. However, it is not supported in version 1 of the API. - * @param query - The search query string. - * @param params - Additional parameters for the search. - * @returns Throws an error advising to use version 0 of the API. - */ - search(query: string, params?: any): Promise; - /** - * Initiates a crawl job for a URL using the Firecrawl API. - * @param url - The URL to crawl. - * @param params - Additional parameters for the crawl request. - * @param pollInterval - Time in seconds for job status checks. - * @param idempotencyKey - Optional idempotency key for the request. - * @returns The response from the crawl operation. - */ - crawlUrl(url: string, params?: CrawlParams, pollInterval?: number, idempotencyKey?: string): Promise; - asyncCrawlUrl(url: string, params?: CrawlParams, idempotencyKey?: string): Promise; - /** - * Checks the status of a crawl job using the Firecrawl API. - * @param id - The ID of the crawl operation. - * @returns The response containing the job status. - */ - checkCrawlStatus(id?: string): Promise; - crawlUrlAndWatch(url: string, params?: CrawlParams, idempotencyKey?: string): Promise; - mapUrl(url: string, params?: MapParams): Promise; - /** - * Prepares the headers for an API request. - * @param idempotencyKey - Optional key to ensure idempotency. - * @returns The prepared headers. - */ - prepareHeaders(idempotencyKey?: string): AxiosRequestHeaders; - /** - * Sends a POST request to the specified URL. - * @param url - The URL to send the request to. - * @param data - The data to send in the request. - * @param headers - The headers for the request. - * @returns The response from the POST request. - */ - postRequest(url: string, data: any, headers: AxiosRequestHeaders): Promise; - /** - * Sends a GET request to the specified URL. - * @param url - The URL to send the request to. - * @param headers - The headers for the request. - * @returns The response from the GET request. - */ - getRequest(url: string, headers: AxiosRequestHeaders): Promise; - /** - * Monitors the status of a crawl job until completion or failure. - * @param id - The ID of the crawl operation. - * @param headers - The headers for the request. - * @param checkInterval - Interval in seconds for job status checks. - * @param checkUrl - Optional URL to check the status (used for v1 API) - * @returns The final job status or data. - */ - monitorJobStatus(id: string, headers: AxiosRequestHeaders, checkInterval: number): Promise; - /** - * Handles errors from API responses. - * @param {AxiosResponse} response - The response from the API. - * @param {string} action - The action being performed when the error occurred. - */ - handleError(response: AxiosResponse, action: string): void; -} -interface CrawlWatcherEvents { - document: CustomEvent; - done: CustomEvent<{ - status: CrawlStatusResponse["status"]; - data: FirecrawlDocument[]; - }>; - error: CustomEvent<{ - status: CrawlStatusResponse["status"]; - data: FirecrawlDocument[]; - error: string; - }>; -} -export declare class CrawlWatcher extends TypedEventTarget { - private ws; - data: FirecrawlDocument[]; - status: CrawlStatusResponse["status"]; - constructor(id: string, app: FirecrawlApp); - close(): void; -} -export {};