mirror of
https://github.com/mendableai/firecrawl.git
synced 2024-11-16 11:42:24 +08:00
Nick: llm extract support on node sdk
This commit is contained in:
parent
45e33563eb
commit
a2881e9288
|
@ -35,7 +35,7 @@ class FirecrawlApp {
|
|||
Authorization: `Bearer ${this.apiKey}`,
|
||||
};
|
||||
let jsonData = { url, ...params };
|
||||
if (jsonData?.extractorOptions?.extractionSchema) {
|
||||
if (this.version === 'v0' && jsonData?.extractorOptions?.extractionSchema) {
|
||||
let schema = jsonData.extractorOptions.extractionSchema;
|
||||
// Check if schema is an instance of ZodSchema to correctly identify Zod schemas
|
||||
if (schema instanceof zod_1.z.ZodSchema) {
|
||||
|
@ -50,6 +50,20 @@ class FirecrawlApp {
|
|||
},
|
||||
};
|
||||
}
|
||||
else if (this.version === 'v1' && jsonData?.extract?.schema) {
|
||||
let schema = jsonData.extract.schema;
|
||||
// Check if schema is an instance of ZodSchema to correctly identify Zod schemas
|
||||
if (schema instanceof zod_1.z.ZodSchema) {
|
||||
schema = (0, zod_to_json_schema_1.zodToJsonSchema)(schema);
|
||||
}
|
||||
jsonData = {
|
||||
...jsonData,
|
||||
extract: {
|
||||
...jsonData.extract,
|
||||
schema: schema,
|
||||
},
|
||||
};
|
||||
}
|
||||
try {
|
||||
const response = await axios_1.default.post(this.apiUrl + `/${this.version}/scrape`, jsonData, { headers });
|
||||
if (response.status === 200) {
|
||||
|
|
|
@ -30,7 +30,7 @@ export default class FirecrawlApp {
|
|||
Authorization: `Bearer ${this.apiKey}`,
|
||||
};
|
||||
let jsonData = { url, ...params };
|
||||
if (jsonData?.extractorOptions?.extractionSchema) {
|
||||
if (this.version === 'v0' && jsonData?.extractorOptions?.extractionSchema) {
|
||||
let schema = jsonData.extractorOptions.extractionSchema;
|
||||
// Check if schema is an instance of ZodSchema to correctly identify Zod schemas
|
||||
if (schema instanceof z.ZodSchema) {
|
||||
|
@ -45,6 +45,20 @@ export default class FirecrawlApp {
|
|||
},
|
||||
};
|
||||
}
|
||||
else if (this.version === 'v1' && jsonData?.extract?.schema) {
|
||||
let schema = jsonData.extract.schema;
|
||||
// Check if schema is an instance of ZodSchema to correctly identify Zod schemas
|
||||
if (schema instanceof z.ZodSchema) {
|
||||
schema = zodToJsonSchema(schema);
|
||||
}
|
||||
jsonData = {
|
||||
...jsonData,
|
||||
extract: {
|
||||
...jsonData.extract,
|
||||
schema: schema,
|
||||
},
|
||||
};
|
||||
}
|
||||
try {
|
||||
const response = await axios.post(this.apiUrl + `/${this.version}/scrape`, jsonData, { headers });
|
||||
if (response.status === 200) {
|
||||
|
|
|
@ -106,7 +106,7 @@ export interface FirecrawlDocumentV0 {
|
|||
* Defines the options and configurations available for scraping web content.
|
||||
*/
|
||||
export interface ScrapeParams {
|
||||
formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot")[];
|
||||
formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "extract")[];
|
||||
headers?: Record<string, string>;
|
||||
includeTags?: string[];
|
||||
excludeTags?: string[];
|
||||
|
@ -114,6 +114,11 @@ export interface ScrapeParams {
|
|||
screenshotMode?: "desktop" | "full-desktop" | "mobile" | "full-mobile";
|
||||
waitFor?: number;
|
||||
timeout?: number;
|
||||
extract?: {
|
||||
prompt?: string;
|
||||
schema?: z.ZodSchema | any;
|
||||
systemPrompt?: string;
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -345,30 +350,36 @@ export default class FirecrawlApp<T extends "v0" | "v1"> {
|
|||
Authorization: `Bearer ${this.apiKey}`,
|
||||
} as AxiosRequestHeaders;
|
||||
let jsonData: any = { url, ...params };
|
||||
if (jsonData?.extractorOptions?.extractionSchema || jsonData?.extract?.schema) {
|
||||
let schema = jsonData.extractorOptions?.extractionSchema || jsonData.extract?.schema;
|
||||
if (this.version === 'v0' && jsonData?.extractorOptions?.extractionSchema) {
|
||||
let schema = jsonData.extractorOptions.extractionSchema;
|
||||
// Check if schema is an instance of ZodSchema to correctly identify Zod schemas
|
||||
if (schema instanceof z.ZodSchema) {
|
||||
if (schema instanceof z.ZodSchema || schema instanceof z.ZodObject) {
|
||||
schema = zodToJsonSchema(schema);
|
||||
}
|
||||
if(this.version === 'v0') {
|
||||
jsonData = {
|
||||
...jsonData,
|
||||
extractorOptions: {
|
||||
...jsonData.extractorOptions,
|
||||
extractionSchema: schema,
|
||||
mode: jsonData.extractorOptions.mode || "llm-extraction",
|
||||
},
|
||||
};
|
||||
} else {
|
||||
jsonData = {
|
||||
...jsonData,
|
||||
extract: {
|
||||
...jsonData.extract,
|
||||
schema: schema,
|
||||
},
|
||||
};
|
||||
jsonData = {
|
||||
...jsonData,
|
||||
extractorOptions: {
|
||||
...jsonData.extractorOptions,
|
||||
extractionSchema: schema,
|
||||
mode: jsonData.extractorOptions.mode || "llm-extraction",
|
||||
},
|
||||
};
|
||||
} else if (this.version === 'v1' && jsonData?.extract?.schema) {
|
||||
let schema = jsonData.extract.schema;
|
||||
|
||||
// Try parsing the schema as a Zod schema
|
||||
try {
|
||||
schema = zodToJsonSchema(schema);
|
||||
} catch (error) {
|
||||
|
||||
}
|
||||
jsonData = {
|
||||
...jsonData,
|
||||
extract: {
|
||||
...jsonData.extract,
|
||||
schema: schema,
|
||||
},
|
||||
};
|
||||
}
|
||||
try {
|
||||
const response: AxiosResponse = await axios.post(
|
||||
|
|
8
apps/js-sdk/firecrawl/types/index.d.ts
vendored
8
apps/js-sdk/firecrawl/types/index.d.ts
vendored
|
@ -69,6 +69,7 @@ export interface FirecrawlDocument {
|
|||
html?: string;
|
||||
rawHtml?: string;
|
||||
links?: string[];
|
||||
extract?: Record<any, any>;
|
||||
screenshot?: string;
|
||||
metadata: FirecrawlDocumentMetadata;
|
||||
}
|
||||
|
@ -97,7 +98,7 @@ export interface FirecrawlDocumentV0 {
|
|||
* Defines the options and configurations available for scraping web content.
|
||||
*/
|
||||
export interface ScrapeParams {
|
||||
formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot")[];
|
||||
formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "extract")[];
|
||||
headers?: Record<string, string>;
|
||||
includeTags?: string[];
|
||||
excludeTags?: string[];
|
||||
|
@ -105,6 +106,11 @@ export interface ScrapeParams {
|
|||
screenshotMode?: "desktop" | "full-desktop" | "mobile" | "full-mobile";
|
||||
waitFor?: number;
|
||||
timeout?: number;
|
||||
extract?: {
|
||||
prompt?: string;
|
||||
schema?: z.ZodSchema | any;
|
||||
systemPrompt?: string;
|
||||
};
|
||||
}
|
||||
/**
|
||||
* Parameters for scraping operations on v0.
|
||||
|
|
Loading…
Reference in New Issue
Block a user