Nick: llm extract support on node sdk

This commit is contained in:
Nicolas 2024-08-30 13:43:19 -03:00
parent 45e33563eb
commit a2881e9288
4 changed files with 69 additions and 24 deletions

View File

@ -35,7 +35,7 @@ class FirecrawlApp {
Authorization: `Bearer ${this.apiKey}`,
};
let jsonData = { url, ...params };
if (jsonData?.extractorOptions?.extractionSchema) {
if (this.version === 'v0' && jsonData?.extractorOptions?.extractionSchema) {
let schema = jsonData.extractorOptions.extractionSchema;
// Check if schema is an instance of ZodSchema to correctly identify Zod schemas
if (schema instanceof zod_1.z.ZodSchema) {
@ -50,6 +50,20 @@ class FirecrawlApp {
},
};
}
else if (this.version === 'v1' && jsonData?.extract?.schema) {
let schema = jsonData.extract.schema;
// Check if schema is an instance of ZodSchema to correctly identify Zod schemas
if (schema instanceof zod_1.z.ZodSchema) {
schema = (0, zod_to_json_schema_1.zodToJsonSchema)(schema);
}
jsonData = {
...jsonData,
extract: {
...jsonData.extract,
schema: schema,
},
};
}
try {
const response = await axios_1.default.post(this.apiUrl + `/${this.version}/scrape`, jsonData, { headers });
if (response.status === 200) {

View File

@ -30,7 +30,7 @@ export default class FirecrawlApp {
Authorization: `Bearer ${this.apiKey}`,
};
let jsonData = { url, ...params };
if (jsonData?.extractorOptions?.extractionSchema) {
if (this.version === 'v0' && jsonData?.extractorOptions?.extractionSchema) {
let schema = jsonData.extractorOptions.extractionSchema;
// Check if schema is an instance of ZodSchema to correctly identify Zod schemas
if (schema instanceof z.ZodSchema) {
@ -45,6 +45,20 @@ export default class FirecrawlApp {
},
};
}
else if (this.version === 'v1' && jsonData?.extract?.schema) {
let schema = jsonData.extract.schema;
// Check if schema is an instance of ZodSchema to correctly identify Zod schemas
if (schema instanceof z.ZodSchema) {
schema = zodToJsonSchema(schema);
}
jsonData = {
...jsonData,
extract: {
...jsonData.extract,
schema: schema,
},
};
}
try {
const response = await axios.post(this.apiUrl + `/${this.version}/scrape`, jsonData, { headers });
if (response.status === 200) {

View File

@ -106,7 +106,7 @@ export interface FirecrawlDocumentV0 {
* Defines the options and configurations available for scraping web content.
*/
export interface ScrapeParams {
formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot")[];
formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "extract")[];
headers?: Record<string, string>;
includeTags?: string[];
excludeTags?: string[];
@ -114,6 +114,11 @@ export interface ScrapeParams {
screenshotMode?: "desktop" | "full-desktop" | "mobile" | "full-mobile";
waitFor?: number;
timeout?: number;
extract?: {
prompt?: string;
schema?: z.ZodSchema | any;
systemPrompt?: string;
};
}
/**
@ -345,30 +350,36 @@ export default class FirecrawlApp<T extends "v0" | "v1"> {
Authorization: `Bearer ${this.apiKey}`,
} as AxiosRequestHeaders;
let jsonData: any = { url, ...params };
if (jsonData?.extractorOptions?.extractionSchema || jsonData?.extract?.schema) {
let schema = jsonData.extractorOptions?.extractionSchema || jsonData.extract?.schema;
if (this.version === 'v0' && jsonData?.extractorOptions?.extractionSchema) {
let schema = jsonData.extractorOptions.extractionSchema;
// Check if schema is an instance of ZodSchema to correctly identify Zod schemas
if (schema instanceof z.ZodSchema) {
if (schema instanceof z.ZodSchema || schema instanceof z.ZodObject) {
schema = zodToJsonSchema(schema);
}
if(this.version === 'v0') {
jsonData = {
...jsonData,
extractorOptions: {
...jsonData.extractorOptions,
extractionSchema: schema,
mode: jsonData.extractorOptions.mode || "llm-extraction",
},
};
} else {
jsonData = {
...jsonData,
extract: {
...jsonData.extract,
schema: schema,
},
};
jsonData = {
...jsonData,
extractorOptions: {
...jsonData.extractorOptions,
extractionSchema: schema,
mode: jsonData.extractorOptions.mode || "llm-extraction",
},
};
} else if (this.version === 'v1' && jsonData?.extract?.schema) {
let schema = jsonData.extract.schema;
// Try parsing the schema as a Zod schema
try {
schema = zodToJsonSchema(schema);
} catch (error) {
}
jsonData = {
...jsonData,
extract: {
...jsonData.extract,
schema: schema,
},
};
}
try {
const response: AxiosResponse = await axios.post(

View File

@ -69,6 +69,7 @@ export interface FirecrawlDocument {
html?: string;
rawHtml?: string;
links?: string[];
extract?: Record<any, any>;
screenshot?: string;
metadata: FirecrawlDocumentMetadata;
}
@ -97,7 +98,7 @@ export interface FirecrawlDocumentV0 {
* Defines the options and configurations available for scraping web content.
*/
export interface ScrapeParams {
formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot")[];
formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "extract")[];
headers?: Record<string, string>;
includeTags?: string[];
excludeTags?: string[];
@ -105,6 +106,11 @@ export interface ScrapeParams {
screenshotMode?: "desktop" | "full-desktop" | "mobile" | "full-mobile";
waitFor?: number;
timeout?: number;
extract?: {
prompt?: string;
schema?: z.ZodSchema | any;
systemPrompt?: string;
};
}
/**
* Parameters for scraping operations on v0.