diff --git a/apps/js-sdk/firecrawl/build/cjs/index.js b/apps/js-sdk/firecrawl/build/cjs/index.js index c6e93e00..7b0730f5 100644 --- a/apps/js-sdk/firecrawl/build/cjs/index.js +++ b/apps/js-sdk/firecrawl/build/cjs/index.js @@ -3,9 +3,12 @@ var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); +exports.CrawlWatcher = void 0; const axios_1 = __importDefault(require("axios")); const zod_1 = require("zod"); const zod_to_json_schema_1 = require("zod-to-json-schema"); +const isows_1 = require("isows"); +const typescript_event_target_1 = require("typescript-event-target"); /** * Main class for interacting with the Firecrawl API. * Provides methods for scraping, searching, crawling, and mapping web content. @@ -15,13 +18,9 @@ class FirecrawlApp { * Initializes a new instance of the FirecrawlApp class. * @param config - Configuration options for the FirecrawlApp instance. */ - constructor({ apiKey = null, apiUrl = null, version = "v1" }) { + constructor({ apiKey = null, apiUrl = null }) { this.apiKey = apiKey || ""; this.apiUrl = apiUrl || "https://api.firecrawl.dev"; - this.version = version; - if (!this.apiKey) { - throw new Error("No API key provided"); - } } /** * Scrapes a URL using the Firecrawl API. @@ -51,16 +50,16 @@ class FirecrawlApp { }; } try { - const response = await axios_1.default.post(this.apiUrl + `/${this.version}/scrape`, jsonData, { headers }); + const response = await axios_1.default.post(this.apiUrl + `/v1/scrape`, jsonData, { headers }); if (response.status === 200) { const responseData = response.data; if (responseData.success) { - return (this.version === 'v0' ? responseData : { + return { success: true, warning: responseData.warning, error: responseData.error, ...responseData.data - }); + }; } else { throw new Error(`Failed to scrape URL. Error: ${responseData.error}`); @@ -76,80 +75,52 @@ class FirecrawlApp { return { success: false, error: "Internal server error." }; } /** - * Searches for a query using the Firecrawl API. - * @param query - The query to search for. - * @param params - Additional parameters for the search request. - * @returns The response from the search operation. + * This method is intended to search for a query using the Firecrawl API. However, it is not supported in version 1 of the API. + * @param query - The search query string. + * @param params - Additional parameters for the search. + * @returns Throws an error advising to use version 0 of the API. */ async search(query, params) { - if (this.version === "v1") { - throw new Error("Search is not supported in v1, please update FirecrawlApp() initialization to use v0."); - } - const headers = { - "Content-Type": "application/json", - Authorization: `Bearer ${this.apiKey}`, - }; - let jsonData = { query }; - if (params) { - jsonData = { ...jsonData, ...params }; - } - try { - const response = await axios_1.default.post(this.apiUrl + "/v0/search", jsonData, { headers }); - if (response.status === 200) { - const responseData = response.data; - if (responseData.success) { - return responseData; - } - else { - throw new Error(`Failed to search. Error: ${responseData.error}`); - } - } - else { - this.handleError(response, "search"); - } - } - catch (error) { - throw new Error(error.message); - } - return { success: false, error: "Internal server error." }; + throw new Error("Search is not supported in v1, please update FirecrawlApp() initialization to use v0."); } /** * Initiates a crawl job for a URL using the Firecrawl API. * @param url - The URL to crawl. * @param params - Additional parameters for the crawl request. - * @param waitUntilDone - Whether to wait for the crawl job to complete. * @param pollInterval - Time in seconds for job status checks. * @param idempotencyKey - Optional idempotency key for the request. * @returns The response from the crawl operation. */ - async crawlUrl(url, params, waitUntilDone = true, pollInterval = 2, idempotencyKey) { + async crawlUrl(url, params, pollInterval = 2, idempotencyKey) { const headers = this.prepareHeaders(idempotencyKey); let jsonData = { url, ...params }; try { - const response = await this.postRequest(this.apiUrl + `/${this.version}/crawl`, jsonData, headers); + const response = await this.postRequest(this.apiUrl + `/v1/crawl`, jsonData, headers); if (response.status === 200) { - const id = this.version === 'v0' ? response.data.jobId : response.data.id; - let checkUrl = undefined; - if (waitUntilDone) { - if (this.version === 'v1') { - checkUrl = response.data.url; - } - return this.monitorJobStatus(id, headers, pollInterval, checkUrl); - } - else { - if (this.version === 'v0') { - return { - success: true, - jobId: id - }; - } - else { - return { - success: true, - id: id - }; - } - } + const id = response.data.id; + return this.monitorJobStatus(id, headers, pollInterval); + } + else { + this.handleError(response, "start crawl job"); + } + } + catch (error) { + if (error.response?.data?.error) { + throw new Error(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ''}`); + } + else { + throw new Error(error.message); + } + } + return { success: false, error: "Internal server error." }; + } + async asyncCrawlUrl(url, params, idempotencyKey) { + const headers = this.prepareHeaders(idempotencyKey); + let jsonData = { url, ...params }; + try { + const response = await this.postRequest(this.apiUrl + `/v1/crawl`, jsonData, headers); + if (response.status === 200) { + return response.data; } else { this.handleError(response, "start crawl job"); @@ -176,37 +147,19 @@ class FirecrawlApp { } const headers = this.prepareHeaders(); try { - const response = await this.getRequest(this.version === 'v1' ? - `${this.apiUrl}/${this.version}/crawl/${id}` : - `${this.apiUrl}/${this.version}/crawl/status/${id}`, headers); + const response = await this.getRequest(`${this.apiUrl}/v1/crawl/${id}`, headers); if (response.status === 200) { - if (this.version === 'v0') { - return { - success: true, - status: response.data.status, - current: response.data.current, - current_url: response.data.current_url, - current_step: response.data.current_step, - total: response.data.total, - data: response.data.data, - partial_data: !response.data.data - ? response.data.partial_data - : undefined, - }; - } - else { - return { - success: true, - status: response.data.status, - total: response.data.total, - completed: response.data.completed, - creditsUsed: response.data.creditsUsed, - expiresAt: new Date(response.data.expiresAt), - next: response.data.next, - data: response.data.data, - error: response.data.error - }; - } + return ({ + success: true, + status: response.data.status, + total: response.data.total, + completed: response.data.completed, + creditsUsed: response.data.creditsUsed, + expiresAt: new Date(response.data.expiresAt), + next: response.data.next, + data: response.data.data, + error: response.data.error + }); } else { this.handleError(response, "check crawl status"); @@ -215,29 +168,21 @@ class FirecrawlApp { catch (error) { throw new Error(error.message); } - return this.version === 'v0' ? - { - success: false, - status: "unknown", - current: 0, - current_url: "", - current_step: "", - total: 0, - error: "Internal server error.", - } : - { - success: false, - error: "Internal server error.", - }; + return { success: false, error: "Internal server error." }; + } + async crawlUrlAndWatch(url, params, idempotencyKey) { + const crawl = await this.asyncCrawlUrl(url, params, idempotencyKey); + if (crawl.success && crawl.id) { + const id = crawl.id; + return new CrawlWatcher(id, this); + } + throw new Error("Crawl job failed to start"); } async mapUrl(url, params) { - if (this.version == 'v0') { - throw new Error("Map is not supported in v0"); - } const headers = this.prepareHeaders(); let jsonData = { url, ...params }; try { - const response = await this.postRequest(this.apiUrl + `/${this.version}/map`, jsonData, headers); + const response = await this.postRequest(this.apiUrl + `/v1/map`, jsonData, headers); if (response.status === 200) { return response.data; } @@ -289,21 +234,14 @@ class FirecrawlApp { * @param checkUrl - Optional URL to check the status (used for v1 API) * @returns The final job status or data. */ - async monitorJobStatus(id, headers, checkInterval, checkUrl) { - let apiUrl = ''; + async monitorJobStatus(id, headers, checkInterval) { while (true) { - if (this.version === 'v1') { - apiUrl = checkUrl ?? `${this.apiUrl}/v1/crawl/${id}`; - } - else if (this.version === 'v0') { - apiUrl = `${this.apiUrl}/v0/crawl/status/${id}`; - } - const statusResponse = await this.getRequest(apiUrl, headers); + const statusResponse = await this.getRequest(`${this.apiUrl}/v1/crawl/${id}`, headers); if (statusResponse.status === 200) { const statusData = statusResponse.data; if (statusData.status === "completed") { if ("data" in statusData) { - return this.version === 'v0' ? statusData.data : statusData; + return statusData; } else { throw new Error("Crawl job completed but no data was returned"); @@ -338,3 +276,72 @@ class FirecrawlApp { } } exports.default = FirecrawlApp; +class CrawlWatcher extends typescript_event_target_1.TypedEventTarget { + constructor(id, app) { + super(); + this.ws = new isows_1.WebSocket(`${app.apiUrl}/v1/crawl/${id}`, app.apiKey); + this.status = "scraping"; + this.data = []; + const messageHandler = (msg) => { + if (msg.type === "done") { + this.status = "completed"; + this.dispatchTypedEvent("done", new CustomEvent("done", { + detail: { + status: this.status, + data: this.data, + }, + })); + } + else if (msg.type === "error") { + this.status = "failed"; + this.dispatchTypedEvent("error", new CustomEvent("error", { + detail: { + status: this.status, + data: this.data, + error: msg.error, + }, + })); + } + else if (msg.type === "catchup") { + this.status = msg.data.status; + this.data.push(...(msg.data.data ?? [])); + for (const doc of this.data) { + this.dispatchTypedEvent("document", new CustomEvent("document", { + detail: doc, + })); + } + } + else if (msg.type === "document") { + this.dispatchTypedEvent("document", new CustomEvent("document", { + detail: msg.data, + })); + } + }; + this.ws.onmessage = ((ev) => { + if (typeof ev.data !== "string") { + this.ws.close(); + return; + } + const msg = JSON.parse(ev.data); + messageHandler(msg); + }).bind(this); + this.ws.onclose = ((ev) => { + const msg = JSON.parse(ev.reason); + messageHandler(msg); + }).bind(this); + this.ws.onerror = ((_) => { + this.status = "failed"; + this.dispatchTypedEvent("error", new CustomEvent("error", { + detail: { + status: this.status, + data: this.data, + error: "WebSocket error", + }, + })); + }).bind(this); + } + close() { + this.ws.close(); + } +} +exports.CrawlWatcher = CrawlWatcher; diff --git a/apps/js-sdk/firecrawl/build/esm/index.js b/apps/js-sdk/firecrawl/build/esm/index.js index 3491a673..cccd1770 100644 --- a/apps/js-sdk/firecrawl/build/esm/index.js +++ b/apps/js-sdk/firecrawl/build/esm/index.js @@ -1,6 +1,8 @@ import axios from "axios"; import { z } from "zod"; import { zodToJsonSchema } from "zod-to-json-schema"; +import { WebSocket } from "isows"; +import { TypedEventTarget } from "typescript-event-target"; /** * Main class for interacting with the Firecrawl API. * Provides methods for scraping, searching, crawling, and mapping web content. @@ -10,13 +12,9 @@ export default class FirecrawlApp { * Initializes a new instance of the FirecrawlApp class. * @param config - Configuration options for the FirecrawlApp instance. */ - constructor({ apiKey = null, apiUrl = null, version = "v1" }) { + constructor({ apiKey = null, apiUrl = null }) { this.apiKey = apiKey || ""; this.apiUrl = apiUrl || "https://api.firecrawl.dev"; - this.version = version; - if (!this.apiKey) { - throw new Error("No API key provided"); - } } /** * Scrapes a URL using the Firecrawl API. @@ -46,16 +44,16 @@ export default class FirecrawlApp { }; } try { - const response = await axios.post(this.apiUrl + `/${this.version}/scrape`, jsonData, { headers }); + const response = await axios.post(this.apiUrl + `/v1/scrape`, jsonData, { headers }); if (response.status === 200) { const responseData = response.data; if (responseData.success) { - return (this.version === 'v0' ? responseData : { + return { success: true, warning: responseData.warning, error: responseData.error, ...responseData.data - }); + }; } else { throw new Error(`Failed to scrape URL. Error: ${responseData.error}`); @@ -71,80 +69,52 @@ export default class FirecrawlApp { return { success: false, error: "Internal server error." }; } /** - * Searches for a query using the Firecrawl API. - * @param query - The query to search for. - * @param params - Additional parameters for the search request. - * @returns The response from the search operation. + * This method is intended to search for a query using the Firecrawl API. However, it is not supported in version 1 of the API. + * @param query - The search query string. + * @param params - Additional parameters for the search. + * @returns Throws an error advising to use version 0 of the API. */ async search(query, params) { - if (this.version === "v1") { - throw new Error("Search is not supported in v1, please update FirecrawlApp() initialization to use v0."); - } - const headers = { - "Content-Type": "application/json", - Authorization: `Bearer ${this.apiKey}`, - }; - let jsonData = { query }; - if (params) { - jsonData = { ...jsonData, ...params }; - } - try { - const response = await axios.post(this.apiUrl + "/v0/search", jsonData, { headers }); - if (response.status === 200) { - const responseData = response.data; - if (responseData.success) { - return responseData; - } - else { - throw new Error(`Failed to search. Error: ${responseData.error}`); - } - } - else { - this.handleError(response, "search"); - } - } - catch (error) { - throw new Error(error.message); - } - return { success: false, error: "Internal server error." }; + throw new Error("Search is not supported in v1, please update FirecrawlApp() initialization to use v0."); } /** * Initiates a crawl job for a URL using the Firecrawl API. * @param url - The URL to crawl. * @param params - Additional parameters for the crawl request. - * @param waitUntilDone - Whether to wait for the crawl job to complete. * @param pollInterval - Time in seconds for job status checks. * @param idempotencyKey - Optional idempotency key for the request. * @returns The response from the crawl operation. */ - async crawlUrl(url, params, waitUntilDone = true, pollInterval = 2, idempotencyKey) { + async crawlUrl(url, params, pollInterval = 2, idempotencyKey) { const headers = this.prepareHeaders(idempotencyKey); let jsonData = { url, ...params }; try { - const response = await this.postRequest(this.apiUrl + `/${this.version}/crawl`, jsonData, headers); + const response = await this.postRequest(this.apiUrl + `/v1/crawl`, jsonData, headers); if (response.status === 200) { - const id = this.version === 'v0' ? response.data.jobId : response.data.id; - let checkUrl = undefined; - if (waitUntilDone) { - if (this.version === 'v1') { - checkUrl = response.data.url; - } - return this.monitorJobStatus(id, headers, pollInterval, checkUrl); - } - else { - if (this.version === 'v0') { - return { - success: true, - jobId: id - }; - } - else { - return { - success: true, - id: id - }; - } - } + const id = response.data.id; + return this.monitorJobStatus(id, headers, pollInterval); + } + else { + this.handleError(response, "start crawl job"); + } + } + catch (error) { + if (error.response?.data?.error) { + throw new Error(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ''}`); + } + else { + throw new Error(error.message); + } + } + return { success: false, error: "Internal server error." }; + } + async asyncCrawlUrl(url, params, idempotencyKey) { + const headers = this.prepareHeaders(idempotencyKey); + let jsonData = { url, ...params }; + try { + const response = await this.postRequest(this.apiUrl + `/v1/crawl`, jsonData, headers); + if (response.status === 200) { + return response.data; } else { this.handleError(response, "start crawl job"); @@ -171,37 +141,19 @@ export default class FirecrawlApp { } const headers = this.prepareHeaders(); try { - const response = await this.getRequest(this.version === 'v1' ? - `${this.apiUrl}/${this.version}/crawl/${id}` : - `${this.apiUrl}/${this.version}/crawl/status/${id}`, headers); + const response = await this.getRequest(`${this.apiUrl}/v1/crawl/${id}`, headers); if (response.status === 200) { - if (this.version === 'v0') { - return { - success: true, - status: response.data.status, - current: response.data.current, - current_url: response.data.current_url, - current_step: response.data.current_step, - total: response.data.total, - data: response.data.data, - partial_data: !response.data.data - ? response.data.partial_data - : undefined, - }; - } - else { - return { - success: true, - status: response.data.status, - total: response.data.total, - completed: response.data.completed, - creditsUsed: response.data.creditsUsed, - expiresAt: new Date(response.data.expiresAt), - next: response.data.next, - data: response.data.data, - error: response.data.error - }; - } + return ({ + success: true, + status: response.data.status, + total: response.data.total, + completed: response.data.completed, + creditsUsed: response.data.creditsUsed, + expiresAt: new Date(response.data.expiresAt), + next: response.data.next, + data: response.data.data, + error: response.data.error + }); } else { this.handleError(response, "check crawl status"); @@ -210,29 +162,21 @@ export default class FirecrawlApp { catch (error) { throw new Error(error.message); } - return this.version === 'v0' ? - { - success: false, - status: "unknown", - current: 0, - current_url: "", - current_step: "", - total: 0, - error: "Internal server error.", - } : - { - success: false, - error: "Internal server error.", - }; + return { success: false, error: "Internal server error." }; + } + async crawlUrlAndWatch(url, params, idempotencyKey) { + const crawl = await this.asyncCrawlUrl(url, params, idempotencyKey); + if (crawl.success && crawl.id) { + const id = crawl.id; + return new CrawlWatcher(id, this); + } + throw new Error("Crawl job failed to start"); } async mapUrl(url, params) { - if (this.version == 'v0') { - throw new Error("Map is not supported in v0"); - } const headers = this.prepareHeaders(); let jsonData = { url, ...params }; try { - const response = await this.postRequest(this.apiUrl + `/${this.version}/map`, jsonData, headers); + const response = await this.postRequest(this.apiUrl + `/v1/map`, jsonData, headers); if (response.status === 200) { return response.data; } @@ -284,21 +228,14 @@ export default class FirecrawlApp { * @param checkUrl - Optional URL to check the status (used for v1 API) * @returns The final job status or data. */ - async monitorJobStatus(id, headers, checkInterval, checkUrl) { - let apiUrl = ''; + async monitorJobStatus(id, headers, checkInterval) { while (true) { - if (this.version === 'v1') { - apiUrl = checkUrl ?? `${this.apiUrl}/v1/crawl/${id}`; - } - else if (this.version === 'v0') { - apiUrl = `${this.apiUrl}/v0/crawl/status/${id}`; - } - const statusResponse = await this.getRequest(apiUrl, headers); + const statusResponse = await this.getRequest(`${this.apiUrl}/v1/crawl/${id}`, headers); if (statusResponse.status === 200) { const statusData = statusResponse.data; if (statusData.status === "completed") { if ("data" in statusData) { - return this.version === 'v0' ? statusData.data : statusData; + return statusData; } else { throw new Error("Crawl job completed but no data was returned"); @@ -332,3 +269,71 @@ export default class FirecrawlApp { } } } +export class CrawlWatcher extends TypedEventTarget { + constructor(id, app) { + super(); + this.ws = new WebSocket(`${app.apiUrl}/v1/crawl/${id}`, app.apiKey); + this.status = "scraping"; + this.data = []; + const messageHandler = (msg) => { + if (msg.type === "done") { + this.status = "completed"; + this.dispatchTypedEvent("done", new CustomEvent("done", { + detail: { + status: this.status, + data: this.data, + }, + })); + } + else if (msg.type === "error") { + this.status = "failed"; + this.dispatchTypedEvent("error", new CustomEvent("error", { + detail: { + status: this.status, + data: this.data, + error: msg.error, + }, + })); + } + else if (msg.type === "catchup") { + this.status = msg.data.status; + this.data.push(...(msg.data.data ?? [])); + for (const doc of this.data) { + this.dispatchTypedEvent("document", new CustomEvent("document", { + detail: doc, + })); + } + } + else if (msg.type === "document") { + this.dispatchTypedEvent("document", new CustomEvent("document", { + detail: msg.data, + })); + } + }; + this.ws.onmessage = ((ev) => { + if (typeof ev.data !== "string") { + this.ws.close(); + return; + } + const msg = JSON.parse(ev.data); + messageHandler(msg); + }).bind(this); + this.ws.onclose = ((ev) => { + const msg = JSON.parse(ev.reason); + messageHandler(msg); + }).bind(this); + this.ws.onerror = ((_) => { + this.status = "failed"; + this.dispatchTypedEvent("error", new CustomEvent("error", { + detail: { + status: this.status, + data: this.data, + error: "WebSocket error", + }, + })); + }).bind(this); + } + close() { + this.ws.close(); + } +} diff --git a/apps/js-sdk/firecrawl/package-lock.json b/apps/js-sdk/firecrawl/package-lock.json index 7f25babc..ce6a1a4a 100644 --- a/apps/js-sdk/firecrawl/package-lock.json +++ b/apps/js-sdk/firecrawl/package-lock.json @@ -1,12 +1,12 @@ { "name": "@mendable/firecrawl-js", - "version": "1.0.3", + "version": "1.1.0", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@mendable/firecrawl-js", - "version": "1.0.3", + "version": "1.1.0", "license": "MIT", "dependencies": { "axios": "^1.6.8", diff --git a/apps/js-sdk/firecrawl/types/index.d.ts b/apps/js-sdk/firecrawl/types/index.d.ts index 3ca10744..6b5166b3 100644 --- a/apps/js-sdk/firecrawl/types/index.d.ts +++ b/apps/js-sdk/firecrawl/types/index.d.ts @@ -1,15 +1,13 @@ import { AxiosResponse, AxiosRequestHeaders } from "axios"; -import { z } from "zod"; +import { TypedEventTarget } from "typescript-event-target"; /** * Configuration interface for FirecrawlApp. * @param apiKey - Optional API key for authentication. * @param apiUrl - Optional base URL of the API; defaults to 'https://api.firecrawl.dev'. - * @param version - API version, either 'v0' or 'v1'. */ export interface FirecrawlAppConfig { apiKey?: string | null; apiUrl?: string | null; - version?: "v0" | "v1"; } /** * Metadata for a Firecrawl document. @@ -50,15 +48,6 @@ export interface FirecrawlDocumentMetadata { error?: string; [key: string]: any; } -/** - * Metadata for a Firecrawl document on v0. - * Similar to FirecrawlDocumentMetadata but includes properties specific to API version v0. - */ -export interface FirecrawlDocumentMetadataV0 { - pageStatusCode?: number; - pageError?: string; - [key: string]: any; -} /** * Document interface for Firecrawl. * Represents a document retrieved or processed by Firecrawl. @@ -70,84 +59,30 @@ export interface FirecrawlDocument { rawHtml?: string; links?: string[]; screenshot?: string; - metadata: FirecrawlDocumentMetadata; -} -/** - * Document interface for Firecrawl on v0. - * Represents a document specifically for API version v0 with additional properties. - */ -export interface FirecrawlDocumentV0 { - id?: string; - url?: string; - content: string; - markdown?: string; - html?: string; - llm_extraction?: Record; - createdAt?: Date; - updatedAt?: Date; - type?: string; - metadata: FirecrawlDocumentMetadataV0; - childrenLinks?: string[]; - provider?: string; - warning?: string; - index?: number; + metadata?: FirecrawlDocumentMetadata; } /** * Parameters for scraping operations. * Defines the options and configurations available for scraping web content. */ export interface ScrapeParams { - formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot")[]; + formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "full@scrennshot")[]; headers?: Record; includeTags?: string[]; excludeTags?: string[]; onlyMainContent?: boolean; - screenshotMode?: "desktop" | "full-desktop" | "mobile" | "full-mobile"; waitFor?: number; timeout?: number; } -/** - * Parameters for scraping operations on v0. - * Includes page and extractor options specific to API version v0. - */ -export interface ScrapeParamsV0 { - pageOptions?: { - headers?: Record; - includeHtml?: boolean; - includeRawHtml?: boolean; - onlyIncludeTags?: string[]; - onlyMainContent?: boolean; - removeTags?: string[]; - replaceAllPathsWithAbsolutePaths?: boolean; - screenshot?: boolean; - fullPageScreenshot?: boolean; - waitFor?: number; - }; - extractorOptions?: { - mode?: "markdown" | "llm-extraction" | "llm-extraction-from-raw-html" | "llm-extraction-from-markdown"; - extractionPrompt?: string; - extractionSchema?: Record | z.ZodSchema | any; - }; - timeout?: number; -} /** * Response interface for scraping operations. * Defines the structure of the response received after a scraping operation. */ export interface ScrapeResponse extends FirecrawlDocument { - success: boolean; + success: true; warning?: string; error?: string; } -/** - * Response interface for scraping operations on v0. - * Similar to ScrapeResponse but tailored for responses from API version v0. - */ -export interface ScrapeResponseV0 { - success: boolean; - data?: FirecrawlDocumentV0; - error?: string; -} /** * Parameters for crawling operations. * Includes options for both scraping and mapping during a crawl. @@ -162,36 +97,6 @@ export interface CrawlParams { ignoreSitemap?: boolean; scrapeOptions?: ScrapeParams; } -/** - * Parameters for crawling operations on v0. - * Tailored for API version v0, includes specific options for crawling. - */ -export interface CrawlParamsV0 { - crawlerOptions?: { - includes?: string[]; - excludes?: string[]; - generateImgAltText?: boolean; - returnOnlyUrls?: boolean; - maxDepth?: number; - mode?: "default" | "fast"; - ignoreSitemap?: boolean; - limit?: number; - allowBackwardCrawling?: boolean; - allowExternalContentLinks?: boolean; - }; - pageOptions?: { - headers?: Record; - includeHtml?: boolean; - includeRawHtml?: boolean; - onlyIncludeTags?: string[]; - onlyMainContent?: boolean; - removeTags?: string[]; - replaceAllPathsWithAbsolutePaths?: boolean; - screenshot?: boolean; - fullPageScreenshot?: boolean; - waitFor?: number; - }; -} /** * Response interface for crawling operations. * Defines the structure of the response received after initiating a crawl. @@ -199,16 +104,7 @@ export interface CrawlParamsV0 { export interface CrawlResponse { id?: string; url?: string; - success: boolean; - error?: string; -} -/** - * Response interface for crawling operations on v0. - * Similar to CrawlResponse but tailored for responses from API version v0. - */ -export interface CrawlResponseV0 { - jobId?: string; - success: boolean; + success: true; error?: string; } /** @@ -216,7 +112,7 @@ export interface CrawlResponseV0 { * Provides detailed status of a crawl job including progress and results. */ export interface CrawlStatusResponse { - success: boolean; + success: true; total: number; completed: number; creditsUsed: number; @@ -226,21 +122,6 @@ export interface CrawlStatusResponse { data?: FirecrawlDocument[]; error?: string; } -/** - * Response interface for job status checks on v0. - * Tailored for API version v0, provides status and partial data of a crawl job. - */ -export interface CrawlStatusResponseV0 { - success: boolean; - status: string; - current?: number; - current_url?: string; - current_step?: string; - total?: number; - data?: FirecrawlDocumentV0[]; - partial_data?: FirecrawlDocumentV0[]; - error?: string; -} /** * Parameters for mapping operations. * Defines options for mapping URLs during a crawl. @@ -256,78 +137,62 @@ export interface MapParams { * Defines the structure of the response received after a mapping operation. */ export interface MapResponse { - success: boolean; + success: true; links?: string[]; error?: string; } /** - * Parameters for searching operations on v0. - * Tailored for API version v0, includes specific options for searching content. + * Error response interface. + * Defines the structure of the response received when an error occurs. */ -export interface SearchParamsV0 { - pageOptions?: { - onlyMainContent?: boolean; - fetchPageContent?: boolean; - includeHtml?: boolean; - includeRawHtml?: boolean; - }; - searchOptions?: { - limit?: number; - }; -} -/** - * Response interface for searching operations on v0. - * Defines the structure of the response received after a search operation on v0. - */ -export interface SearchResponseV0 { - success: boolean; - data?: FirecrawlDocumentV0[]; - error?: string; +export interface ErrorResponse { + success: false; + error: string; } /** * Main class for interacting with the Firecrawl API. * Provides methods for scraping, searching, crawling, and mapping web content. */ -export default class FirecrawlApp { - private apiKey; - private apiUrl; - version: T; +export default class FirecrawlApp { + apiKey: string; + apiUrl: string; /** * Initializes a new instance of the FirecrawlApp class. * @param config - Configuration options for the FirecrawlApp instance. */ - constructor({ apiKey, apiUrl, version }: FirecrawlAppConfig); + constructor({ apiKey, apiUrl }: FirecrawlAppConfig); /** * Scrapes a URL using the Firecrawl API. * @param url - The URL to scrape. * @param params - Additional parameters for the scrape request. * @returns The response from the scrape operation. */ - scrapeUrl(url: string, params?: ScrapeParams | ScrapeParamsV0): Promise; + scrapeUrl(url: string, params?: ScrapeParams): Promise; /** - * Searches for a query using the Firecrawl API. - * @param query - The query to search for. - * @param params - Additional parameters for the search request. - * @returns The response from the search operation. + * This method is intended to search for a query using the Firecrawl API. However, it is not supported in version 1 of the API. + * @param query - The search query string. + * @param params - Additional parameters for the search. + * @returns Throws an error advising to use version 0 of the API. */ - search(query: string, params?: SearchParamsV0): Promise; + search(query: string, params?: any): Promise; /** * Initiates a crawl job for a URL using the Firecrawl API. * @param url - The URL to crawl. * @param params - Additional parameters for the crawl request. - * @param waitUntilDone - Whether to wait for the crawl job to complete. * @param pollInterval - Time in seconds for job status checks. * @param idempotencyKey - Optional idempotency key for the request. * @returns The response from the crawl operation. */ - crawlUrl(url: string, params?: this['version'] extends 'v0' ? CrawlParamsV0 : CrawlParams, waitUntilDone?: boolean, pollInterval?: number, idempotencyKey?: string): Promise; + crawlUrl(url: string, params?: CrawlParams, pollInterval?: number, idempotencyKey?: string): Promise; + asyncCrawlUrl(url: string, params?: CrawlParams, idempotencyKey?: string): Promise; /** * Checks the status of a crawl job using the Firecrawl API. * @param id - The ID of the crawl operation. * @returns The response containing the job status. */ - checkCrawlStatus(id?: string): Promise; - mapUrl(url: string, params?: MapParams): Promise; + checkCrawlStatus(id?: string): Promise; + crawlUrlAndWatch(url: string, params?: CrawlParams, idempotencyKey?: string): Promise; + mapUrl(url: string, params?: MapParams): Promise; /** * Prepares the headers for an API request. * @param idempotencyKey - Optional key to ensure idempotency. @@ -357,7 +222,7 @@ export default class FirecrawlApp { * @param checkUrl - Optional URL to check the status (used for v1 API) * @returns The final job status or data. */ - monitorJobStatus(id: string, headers: AxiosRequestHeaders, checkInterval: number, checkUrl?: string): Promise; + monitorJobStatus(id: string, headers: AxiosRequestHeaders, checkInterval: number): Promise; /** * Handles errors from API responses. * @param {AxiosResponse} response - The response from the API. @@ -365,3 +230,23 @@ export default class FirecrawlApp { */ handleError(response: AxiosResponse, action: string): void; } +interface CrawlWatcherEvents { + document: CustomEvent; + done: CustomEvent<{ + status: CrawlStatusResponse["status"]; + data: FirecrawlDocument[]; + }>; + error: CustomEvent<{ + status: CrawlStatusResponse["status"]; + data: FirecrawlDocument[]; + error: string; + }>; +} +export declare class CrawlWatcher extends TypedEventTarget { + private ws; + data: FirecrawlDocument[]; + status: CrawlStatusResponse["status"]; + constructor(id: string, app: FirecrawlApp); + close(): void; +} +export {}; diff --git a/apps/python-sdk/firecrawl/__init__.py b/apps/python-sdk/firecrawl/__init__.py index 229f9ccd..13df20d9 100644 --- a/apps/python-sdk/firecrawl/__init__.py +++ b/apps/python-sdk/firecrawl/__init__.py @@ -13,7 +13,7 @@ import os from .firecrawl import FirecrawlApp -__version__ = "1.0.1" +__version__ = "1.1.1" # Define the logger for the Firecrawl project logger: logging.Logger = logging.getLogger("firecrawl") diff --git a/apps/python-sdk/pyproject.toml b/apps/python-sdk/pyproject.toml index 0a732c43..969fb051 100644 --- a/apps/python-sdk/pyproject.toml +++ b/apps/python-sdk/pyproject.toml @@ -10,6 +10,10 @@ readme = {file="README.md", content-type = "text/markdown"} requires-python = ">=3.8" dependencies = [ "requests", + "python-dotenv", + "websockets", + "asyncio", +"nest-asyncio" ] authors = [{name = "Mendable.ai",email = "nick@mendable.ai"}] maintainers = [{name = "Mendable.ai",email = "nick@mendable.ai"}] diff --git a/apps/python-sdk/setup.py b/apps/python-sdk/setup.py index 4978559b..8a67d1fd 100644 --- a/apps/python-sdk/setup.py +++ b/apps/python-sdk/setup.py @@ -30,6 +30,9 @@ setup( 'requests', 'pytest', 'python-dotenv', + 'websockets', + 'asyncio', + 'nest-asyncio' ], python_requires=">=3.8", classifiers=[