This commit is contained in:
rafaelmmiller 2024-11-13 18:06:20 -03:00
parent 25f32000db
commit 904c904971
6 changed files with 397 additions and 197 deletions

View File

@ -0,0 +1,151 @@
import request from "supertest";
import dotenv from "dotenv";
import {
FirecrawlCrawlResponse,
FirecrawlCrawlStatusResponse,
FirecrawlScrapeResponse,
} from "../../types";
dotenv.config();
const TEST_URL = "http://127.0.0.1:3002";
describe("E2E Tests for Extract API Routes", () => {
describe("POST /v1/extract", () => {
it.concurrent("should return authors of blog posts on firecrawl.dev", async () => {
const response = await request(TEST_URL)
.post("/v1/extract")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
.set("Content-Type", "application/json")
.send({
urls: ["https://firecrawl.dev"],
prompt: "Who are the authors of the blog posts?",
schema: {
type: "object",
properties: { authors: { type: "array", items: { type: "string" } } },
},
});
expect(response.statusCode).toBe(200);
expect(response.body).toHaveProperty("data");
expect(response.body.data).toHaveProperty("founders");
let gotItRight = 0;
for (const author of response.body.data?.authors) {
if (author.includes("Caleb Peffer")) gotItRight++;
if (author.includes("Gergő Móricz")) gotItRight++;
if (author.includes("Eric Ciarla")) gotItRight++;
if (author.includes("Nicolas Camara")) gotItRight++;
}
expect(gotItRight).toBeGreaterThan(3);
}, 60000);
it.concurrent("should return founders of firecrawl.dev (allowExternalLinks = true)", async () => {
const response = await request(TEST_URL)
.post("/v1/extract")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
.set("Content-Type", "application/json")
.send({
urls: ["mendable.ai"],
prompt: "Who are the founders of the company?",
allowExternalLinks: true,
schema: {
type: "object",
properties: { founders: { type: "array", items: { type: "string" } } },
},
});
expect(response.statusCode).toBe(200);
expect(response.body).toHaveProperty("data");
expect(response.body.data).toHaveProperty("founders");
let gotItRight = 0;
for (const founder of response.body.data?.founders) {
if (founder.includes("Caleb")) gotItRight++;
if (founder.includes("Eric")) gotItRight++;
if (founder.includes("Nicolas")) gotItRight++;
}
expect(gotItRight).toBe(3);
}, 60000);
it.concurrent("should return hiring opportunities on firecrawl.dev (allowExternalLinks = true)", async () => {
const response = await request(TEST_URL)
.post("/v1/extract")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
.set("Content-Type", "application/json")
.send({
urls: ["https://firecrawl.dev"],
prompt: "What are they hiring for?",
allowExternalLinks: true,
schema: {
type: "array",
items: {
type: "string"
}
},
});
expect(response.statusCode).toBe(200);
expect(response.body).toHaveProperty("data");
console.log(response.body.data);
let gotItRight = 0;
for (const hiring of response.body.data?.items) {
if (hiring.includes("Developer Relations Specialist")) gotItRight++;
if (hiring.includes("Web Automation Engineer")) gotItRight++;
if (hiring.includes("Developer Experience Engineer")) gotItRight++;
if (hiring.includes("Developer Support Engineer")) gotItRight++;
if (hiring.includes("Dev Ops Engineer")) gotItRight++;
if (hiring.includes("Founding Web Automation Engineer")) gotItRight++;
}
expect(gotItRight).toBeGreaterThan(5);
}, 60000);
it.concurrent("should return PCI DSS compliance for Fivetran", async () => {
const response = await request(TEST_URL)
.post("/v1/extract")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
.set("Content-Type", "application/json")
.send({
urls: ["fivetran.com"],
prompt: "Does Fivetran have PCI DSS compliance?",
allowExternalLinks: true,
schema: {
type: "object",
properties: {
pciDssCompliance: { type: "boolean" }
}
},
});
expect(response.statusCode).toBe(200);
expect(response.body).toHaveProperty("data");
expect(response.body.data?.pciDssCompliance).toBe(true);
}, 60000);
it.concurrent("should return Azure Data Connectors for Fivetran", async () => {
const response = await request(TEST_URL)
.post("/v1/extract")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
.set("Content-Type", "application/json")
.send({
urls: ["fivetran.com"],
prompt: "What are the Azure Data Connectors they offer?",
schema: {
type: "array",
items: {
type: "object",
properties: {
connector: { type: "string" },
description: { type: "string" },
supportsCaptureDelete: { type: "boolean" }
}
}
}
})
console.log(response.body);
// expect(response.statusCode).toBe(200);
// expect(response.body).toHaveProperty("data");
// expect(response.body.data?.pciDssCompliance).toBe(true);
}, 60000);
});
});

View File

@ -26,22 +26,24 @@ import { waitForJob } from "../../services/queue-jobs";
import { addScrapeJob } from "../../services/queue-jobs";
import { PlanType } from "../../types";
import { getJobPriority } from "../../lib/job-priority";
import { generateFinalExtraction } from "../../lib/extract/completions";
import { generateOpenAICompletions } from "../../scraper/scrapeURL/transformers/llmExtract";
import { isUrlBlocked } from "../../scraper/WebScraper/utils/blocklist";
configDotenv();
const redis = new Redis(process.env.REDIS_URL!);
const MAX_EXTRACT_LIMIT = 100;
const MAX_RANKING_LIMIT = 3;
const MAX_RANKING_LIMIT = 5;
const SCORE_THRESHOLD = 0.75;
export async function extractController(
req: RequestWithAuth<{}, ExtractResponse, ExtractRequest>,
res: Response<any> //ExtractResponse>
res: Response<ExtractResponse>
) {
req.body = extractRequestSchema.parse(req.body);
const id = crypto.randomUUID();
let links: string[] = req.body.urls;
let links: string[]; //= req.body.urls;
const sc: StoredCrawl = {
originUrl: req.body.urls[0],
@ -59,10 +61,14 @@ export async function extractController(
const crawler = crawlToCrawler(id, sc);
let urlWithoutWww = req.body.urls[0].replace("www.", "");
console.log("urlWithoutWww", urlWithoutWww);
let mapUrl = req.body.prompt
? `"${req.body.prompt}" site:${urlWithoutWww}`
: `site:${req.body.urls[0]}`;
const allowExternalLinks = req.body.allowExternalLinks ?? false;
let mapUrl = req.body.prompt && allowExternalLinks
? `${req.body.prompt} ${urlWithoutWww}`
: req.body.prompt ? `${req.body.prompt} site:${urlWithoutWww}`
: `site:${urlWithoutWww}`;
const resultsPerPage = 100;
const maxPages = Math.ceil(MAX_EXTRACT_LIMIT / resultsPerPage);
@ -84,82 +90,103 @@ export async function extractController(
};
pagePromises = Array.from({ length: maxPages }, (_, i) => fetchPage(i + 1));
allResults = await Promise.all(pagePromises);
allResults = (await Promise.all(pagePromises)).flat();
// console.log("allResults", allResults);
// if allResults is empty, return an error
if (allResults.length === 0) {
return res.status(400).json({
success: false,
error: "No results found",
});
}
await redis.set(cacheKey, JSON.stringify(allResults), "EX", 24 * 60 * 60); // Cache for 24 hours
}
// console.log("allResults", allResults);
// Parallelize sitemap fetch with serper search
const [sitemap, ...searchResults] = await Promise.all([
req.body.ignoreSitemap ? null : crawler.tryGetSitemap(),
...(cachedResult ? [] : pagePromises),
]);
// const [sitemap, ...searchResults] = await Promise.all([
// req.body.ignoreSitemap ? null : null, // crawler.tryGetSitemap(),
// ...(cachedResult ? [] : pagePromises),
// ]);
if (!cachedResult) {
allResults = searchResults;
}
// if (!cachedResult) {
// allResults = searchResults;
// }
if (sitemap !== null) {
sitemap.forEach((x) => {
links.push(x.url);
});
}
links = allResults.map(x => `url: ${x.url}, title: ${x.title}, description: ${x.description}`);
console.log("links", links);
// if (sitemap !== null) {
// sitemap.forEach((x) => {
// links.push(x.url);
// });
// }
let mapResults = allResults
.flat()
.filter((result) => result !== null && result !== undefined);
// let mapResults = allResults
// .flat()
// .filter((result) => result !== null && result !== undefined);
const minumumCutoff = Math.min(MAX_EXTRACT_LIMIT, req.body.limit ?? MAX_EXTRACT_LIMIT);
if (mapResults.length > minumumCutoff) {
mapResults = mapResults.slice(0, minumumCutoff);
}
// const minumumCutoff = Math.min(MAX_EXTRACT_LIMIT, req.body.limit ?? MAX_EXTRACT_LIMIT);
// if (mapResults.length > minumumCutoff) {
// mapResults = mapResults.slice(0, minumumCutoff);
// }
if (mapResults.length > 0) {
if (req.body.prompt) {
// Ensure all map results are first, maintaining their order
links = [
mapResults[0].url,
...mapResults.slice(1).map((x) => x.url),
...links,
];
} else {
mapResults.map((x) => {
links.push(x.url);
});
}
}
// if (mapResults.length > 0) {
// if (req.body.prompt) {
// // Ensure all map results are first, maintaining their order
// links = [
// mapResults[0].url,
// ...mapResults.slice(1).map((x) => x.url),
// ...links,
// ];
// } else {
// mapResults.map((x) => {
// links.push(x.url);
// });
// }
// }
// console.log("mapResults", mapResults);
// console.log("links", links);
let linksAndScores: { link: string; score: number }[] = [];
// Perform cosine similarity between the search query and the list of links
if (req.body.prompt) {
const searchQuery = req.body.prompt.toLowerCase();
const searchQuery = mapUrl; //req.body.prompt.toLowerCase();
linksAndScores = await performRanking(links, searchQuery);
}
console.log("linksAndScores", linksAndScores);
links = linksAndScores
.filter(x => x.score > SCORE_THRESHOLD)
.map(x => x.link.split("url: ")[1].split(",")[0])
.filter(x => !isUrlBlocked(x))
console.log("links:", links.length);
// should we use some sort of llm to determine the best links?
// console.log("linksAndScores", linksAndScores);
links = links
.map((x) => {
try {
return checkAndUpdateURLForMap(x).url.trim();
} catch (_) {
return null;
}
})
.filter((x) => x !== null) as string[];
// links = links
// .map((x) => {
// try {
// return checkAndUpdateURLForMap(x).url.trim();
// } catch (_) {
// return null;
// }
// })
// .filter((x) => x !== null) as string[];
// allows for subdomains to be included
links = links.filter((x) => isSameDomain(x, req.body.urls[0]));
// links = links.filter((x) => isSameDomain(x, req.body.urls[0]));
// if includeSubdomains is false, filter out subdomains
if (!req.body.includeSubdomains) {
links = links.filter((x) => isSameSubdomain(x, req.body.urls[0]));
}
// if (!req.body.includeSubdomains) {
// links = links.filter((x) => isSameSubdomain(x, req.body.urls[0]));
// z}
// remove duplicates that could be due to http/https or www
links = removeDuplicateUrls(links);
// links = removeDuplicateUrls(links);
// get top N links
links = links.slice(0, MAX_RANKING_LIMIT);
@ -170,7 +197,7 @@ export async function extractController(
for (const url of links) {
const origin = req.body.origin || "api";
const timeout = req.body.timeout;
const timeout = req.body.timeout ?? 30000;
const jobId = crypto.randomUUID();
const startTime = new Date().getTime();
@ -196,7 +223,7 @@ export async function extractController(
jobPriority
);
const totalWait = 60000 // (req.body.waitFor ?? 0) + (req.body.actions ?? []).reduce((a,x) => (x.type === "wait" ? x.milliseconds ?? 0 : 0) + a, 0);
const totalWait = 0 //60000 // (req.body.waitFor ?? 0) + (req.body.actions ?? []).reduce((a,x) => (x.type === "wait" ? x.milliseconds ?? 0 : 0) + a, 0);
let doc: Document;
try {
@ -234,18 +261,20 @@ export async function extractController(
docs.push(doc);
}
console.log(docs)
// console.log("docs", docs);
const completions = await generateOpenAICompletions(
logger.child({ method: "extractController/generateOpenAICompletions" }),
{
mode: "llm",
systemPrompt: "Only use the provided content to answer the question.",
prompt: mapUrl,
schema: req.body.schema,
},
docs.map(x => x.markdown).join('\n')
);
// {"message":"Missing required parameter: 'response_format.json_schema.schema'.","type":"invalid_request_error","param":"response_format.json_schema.schema","code":"missing_required_parameter"},"code":"missing_required_parameter","param":"response_format.json_schema.schema","type":"invalid_request_error"}
const completions = await generateFinalExtraction({
pagesContent: docs.map(x => x.markdown).join('\n'),
systemPrompt: '',
prompt: req.body.prompt,
schema: req.body.schema,
});
// console.log("completions", completions);
console.log("completions", completions);
// if(req.body.extract && req.body.formats.includes("extract")) {
// creditsToBeBilled = 5;
@ -315,9 +344,18 @@ export async function extractController(
// scrape_id: result.scrape_id
// };
console.log("completions.extract", completions.extract);
let data: any;
try {
data = JSON.parse(completions.extract);
} catch (e) {
data = completions.extract;
}
return res.status(200).json({
success: true,
data: completions.content, // includeMetadata ? mapResults : linksToReturn,
data: data, // includeMetadata ? mapResults : linksToReturn,
scrape_id: id, //origin?.includes("website") ? id : undefined,
});
}

View File

@ -161,6 +161,7 @@ export const extractV1Options = z.object({
limit: z.number().int().positive().finite().safe().optional(),
ignoreSitemap: z.boolean().default(false),
includeSubdomains: z.boolean().default(true),
allowExternalLinks: z.boolean().default(false),
origin: z.string().optional().default("api"),
timeout: z.number().int().positive().finite().safe().default(60000),
}).strict(strictMessage)
@ -353,7 +354,7 @@ export type ExtractResponse =
| {
success: true;
warning?: string;
data: Document;
data: z.infer<typeof extractRequestSchema>;
scrape_id?: string;
};

View File

@ -1,121 +1,124 @@
import OpenAI from "openai";
import { encoding_for_model } from "@dqbd/tiktoken";
import { TiktokenModel } from "@dqbd/tiktoken";
import { ExtractOptions } from "../../controllers/v1/types";
import { Document } from "../entities";
import { z } from "zod";
// use llmExtract.ts instead
const maxTokens = 32000;
const modifier = 4;
// import OpenAI from "openai";
// import { encoding_for_model } from "@dqbd/tiktoken";
// import { TiktokenModel } from "@dqbd/tiktoken";
// import { ExtractOptions } from "../../controllers/v1/types";
// import { Document } from "../entities";
// import { z } from "zod";
export class LLMRefusalError extends Error {
constructor(refusal: string) {
super("LLM refused to extract the website's content");
this.name = "LLMRefusalError";
}
}
// const maxTokens = 32000;
// const modifier = 4;
interface GenerateCompletionsParams {
systemPrompt?: string;
prompt?: string;
schema?: any;
pagesContent: string;
}
// export class LLMRefusalError extends Error {
// constructor(refusal: string) {
// super("LLM refused to extract the website's content");
// this.name = "LLMRefusalError";
// }
// }
export async function generateBasicCompletion(prompt: string) {
const openai = new OpenAI();
const model: TiktokenModel =
(process.env.MODEL_NAME as TiktokenModel) ?? "gpt-4o-mini";
// interface GenerateCompletionsParams {
// systemPrompt?: string;
// prompt?: string;
// schema?: any;
// pagesContent: string;
// }
const completion = await openai.chat.completions.create({
model,
messages: [{ role: "user", content: prompt }],
});
// export async function generateBasicCompletion(prompt: string) {
// const openai = new OpenAI();
// const model: TiktokenModel =
// (process.env.MODEL_NAME as TiktokenModel) ?? "gpt-4o-mini";
return completion.choices[0].message.content;
}
// const completion = await openai.chat.completions.create({
// model,
// messages: [{ role: "user", content: prompt }],
// });
export async function generateFinalExtraction({
pagesContent,
systemPrompt,
prompt,
schema,
}: GenerateCompletionsParams): Promise<{
content: string;
metadata: { numTokens: number; warning: string };
}> {
const openai = new OpenAI();
const model: TiktokenModel =
(process.env.MODEL_NAME as TiktokenModel) ?? "gpt-4o-mini";
// return completion.choices[0].message.content;
// }
let extractionContent = pagesContent;
let numTokens = 0;
let warning = "";
// export async function generateFinalExtraction({
// pagesContent,
// systemPrompt,
// prompt,
// schema,
// }: GenerateCompletionsParams): Promise<{
// content: string;
// metadata: { numTokens: number; warning: string };
// }> {
// const openai = new OpenAI();
// const model: TiktokenModel =
// (process.env.MODEL_NAME as TiktokenModel) ?? "gpt-4o-mini";
const encoder = encoding_for_model(model);
try {
const tokens = encoder.encode(extractionContent);
numTokens = tokens.length;
} catch (error) {
extractionContent = extractionContent.slice(0, maxTokens * modifier);
warning = `Failed to derive number of LLM tokens the extraction might use -- the input has been automatically trimmed to the maximum number of tokens (${maxTokens}) we support.`;
} finally {
encoder.free();
}
// let extractionContent = pagesContent;
// let numTokens = 0;
// let warning = "";
if (numTokens > maxTokens) {
extractionContent = extractionContent.slice(0, maxTokens * modifier);
warning = `The extraction content would have used more tokens (${numTokens}) than the maximum we allow (${maxTokens}). -- the input has been automatically trimmed.`;
}
// const encoder = encoding_for_model(model);
// try {
// const tokens = encoder.encode(extractionContent);
// numTokens = tokens.length;
// } catch (error) {
// extractionContent = extractionContent.slice(0, maxTokens * modifier);
// warning = `Failed to derive number of LLM tokens the extraction might use -- the input has been automatically trimmed to the maximum number of tokens (${maxTokens}) we support.`;
// } finally {
// encoder.free();
// }
if (schema && (schema.type === "array" || schema._type === "ZodArray")) {
schema = {
type: "object",
properties: {
items: schema,
},
required: ["items"],
additionalProperties: false,
};
} else if (schema) {
schema.additionalProperties = false;
schema.required = Object.keys(schema.properties);
}
// if (numTokens > maxTokens) {
// extractionContent = extractionContent.slice(0, maxTokens * modifier);
// warning = `The extraction content would have used more tokens (${numTokens}) than the maximum we allow (${maxTokens}). -- the input has been automatically trimmed.`;
// }
const jsonCompletion = await openai.beta.chat.completions.parse({
model,
messages: [
{ role: "system", content: systemPrompt ?? "" },
{ role: "user", content: [{ type: "text", text: extractionContent }] },
{
role: "user",
content: prompt
? `Transform the above content into structured JSON output based on the following user request: ${prompt}`
: "Transform the above content into structured JSON output.",
},
],
response_format: schema
? {
type: "json_schema",
json_schema: {
name: "websiteContent",
schema: schema,
strict: true,
},
}
: { type: "json_object" },
});
// if (schema && (schema.type === "array" || schema._type === "ZodArray")) {
// schema = {
// type: "object",
// properties: {
// items: schema,
// },
// required: ["items"],
// additionalProperties: false,
// };
// } else if (schema) {
// schema.additionalProperties = false;
// schema.required = Object.keys(schema.properties);
// }
if (jsonCompletion.choices[0].message.refusal !== null) {
throw new LLMRefusalError(jsonCompletion.choices[0].message.refusal);
}
// const jsonCompletion = await openai.beta.chat.completions.parse({
// temperature: 0,
// model,
// messages: [
// { role: "system", content: systemPrompt ?? "" },
// { role: "user", content: [{ type: "text", text: extractionContent }] },
// {
// role: "user",
// content: prompt
// ? `Transform the above content into structured JSON output based on the following user request: ${prompt}`
// : "Transform the above content into structured JSON output.",
// },
// ],
// response_format: schema
// ? {
// type: "json_schema",
// json_schema: {
// name: "websiteContent",
// schema: schema,
// strict: true,
// },
// }
// : { type: "json_object" },
// });
const extraction = jsonCompletion.choices[0].message.parsed;
return {
content: extraction ?? "",
metadata: {
numTokens,
warning,
},
};
}
// if (jsonCompletion.choices[0].message.refusal !== null) {
// throw new LLMRefusalError(jsonCompletion.choices[0].message.refusal);
// }
// const extraction = jsonCompletion.choices[0].message.parsed;
// return {
// content: extraction ?? "",
// metadata: {
// numTokens,
// warning,
// },
// };
// }

View File

@ -58,32 +58,33 @@ function normalizeSchema(x: any): any {
}
}
async function generateOpenAICompletions(logger: Logger, document: Document, options: ExtractOptions): Promise<Document> {
export async function generateOpenAICompletions(logger: Logger, options: ExtractOptions, markdown?: string, previousWarning?: string): Promise<{ extract: any, warning: string | undefined }> {
let extract: any;
let warning: string | undefined;
const openai = new OpenAI();
const model: TiktokenModel = (process.env.MODEL_NAME as TiktokenModel) ?? "gpt-4o-mini";
if (document.markdown === undefined) {
if (markdown === undefined) {
throw new Error("document.markdown is undefined -- this is unexpected");
}
let extractionContent = document.markdown;
// count number of tokens
let numTokens = 0;
const encoder = encoding_for_model(model as TiktokenModel);
try {
// Encode the message into tokens
const tokens = encoder.encode(extractionContent);
const tokens = encoder.encode(markdown);
// Return the number of tokens
numTokens = tokens.length;
} catch (error) {
logger.warn("Calculating num tokens of string failed", { error, extractionContent });
logger.warn("Calculating num tokens of string failed", { error, markdown });
extractionContent = extractionContent.slice(0, maxTokens * modifier);
markdown = markdown.slice(0, maxTokens * modifier);
const warning = "Failed to derive number of LLM tokens the extraction might use -- the input has been automatically trimmed to the maximum number of tokens (" + maxTokens + ") we support.";
document.warning = document.warning === undefined ? warning : " " + warning;
let w = "Failed to derive number of LLM tokens the extraction might use -- the input has been automatically trimmed to the maximum number of tokens (" + maxTokens + ") we support.";
warning = previousWarning === undefined ? w : w + " " + previousWarning;
} finally {
// Free the encoder resources after use
encoder.free();
@ -91,10 +92,10 @@ async function generateOpenAICompletions(logger: Logger, document: Document, opt
if (numTokens > maxTokens) {
// trim the document to the maximum number of tokens, tokens != characters
extractionContent = extractionContent.slice(0, maxTokens * modifier);
markdown = markdown.slice(0, maxTokens * modifier);
const warning = "The extraction content would have used more tokens (" + numTokens + ") than the maximum we allow (" + maxTokens + "). -- the input has been automatically trimmed.";
document.warning = document.warning === undefined ? warning : " " + warning;
const w = "The extraction content would have used more tokens (" + numTokens + ") than the maximum we allow (" + maxTokens + "). -- the input has been automatically trimmed.";
warning = previousWarning === undefined ? w : w + " " + previousWarning;
}
let schema = options.schema;
@ -120,7 +121,7 @@ async function generateOpenAICompletions(logger: Logger, document: Document, opt
},
{
role: "user",
content: [{ type: "text", text: extractionContent }],
content: [{ type: "text", text: markdown }],
},
{
role: "user",
@ -143,11 +144,11 @@ async function generateOpenAICompletions(logger: Logger, document: Document, opt
throw new LLMRefusalError(jsonCompletion.choices[0].message.refusal);
}
document.extract = jsonCompletion.choices[0].message.parsed;
extract = jsonCompletion.choices[0].message.parsed;
if (document.extract === null && jsonCompletion.choices[0].message.content !== null) {
if (extract === null && jsonCompletion.choices[0].message.content !== null) {
try {
document.extract = JSON.parse(jsonCompletion.choices[0].message.content);
extract = JSON.parse(jsonCompletion.choices[0].message.content);
} catch (e) {
logger.error("Failed to parse returned JSON, no schema specified.", { error: e });
throw new LLMRefusalError("Failed to parse returned JSON. Please specify a schema in the extract object.");
@ -155,14 +156,21 @@ async function generateOpenAICompletions(logger: Logger, document: Document, opt
}
if (options.schema && options.schema.type === "array") {
document.extract = document.extract?.items;
extract = extract?.items;
}
return document;
return { extract, warning };
}
export async function performLLMExtract(meta: Meta, document: Document): Promise<Document> {
if (meta.options.formats.includes("extract")) {
document = await generateOpenAICompletions(meta.logger.child({ method: "performLLMExtract/generateOpenAICompletions" }), document, meta.options.extract!);
const { extract, warning } = await generateOpenAICompletions(
meta.logger.child({ method: "performLLMExtract/generateOpenAICompletions" }),
meta.options.extract!,
document.markdown,
document.warning,
);
document.extract = extract;
document.warning = warning;
}
return document;

View File

@ -37,7 +37,6 @@ export async function fireEngineMap(
);
return [];
}
console.log("process.env.FIRE_ENGINE_BETA_URL", process.env.FIRE_ENGINE_BETA_URL);
const response = await fetch(`${process.env.FIRE_ENGINE_BETA_URL}/search`, {
method: "POST",