mirror of
https://github.com/mendableai/firecrawl.git
synced 2024-11-16 11:42:24 +08:00
mvp done?
This commit is contained in:
parent
a175c1513a
commit
25f32000db
|
@ -89,7 +89,7 @@ export async function extractController(
|
||||||
await redis.set(cacheKey, JSON.stringify(allResults), "EX", 24 * 60 * 60); // Cache for 24 hours
|
await redis.set(cacheKey, JSON.stringify(allResults), "EX", 24 * 60 * 60); // Cache for 24 hours
|
||||||
}
|
}
|
||||||
|
|
||||||
console.log("allResults", allResults);
|
// console.log("allResults", allResults);
|
||||||
// Parallelize sitemap fetch with serper search
|
// Parallelize sitemap fetch with serper search
|
||||||
const [sitemap, ...searchResults] = await Promise.all([
|
const [sitemap, ...searchResults] = await Promise.all([
|
||||||
req.body.ignoreSitemap ? null : crawler.tryGetSitemap(),
|
req.body.ignoreSitemap ? null : crawler.tryGetSitemap(),
|
||||||
|
@ -138,7 +138,7 @@ export async function extractController(
|
||||||
linksAndScores = await performRanking(links, searchQuery);
|
linksAndScores = await performRanking(links, searchQuery);
|
||||||
}
|
}
|
||||||
|
|
||||||
console.log("linksAndScores", linksAndScores);
|
// console.log("linksAndScores", linksAndScores);
|
||||||
|
|
||||||
links = links
|
links = links
|
||||||
.map((x) => {
|
.map((x) => {
|
||||||
|
@ -218,8 +218,8 @@ export async function extractController(
|
||||||
|
|
||||||
await getScrapeQueue().remove(jobId);
|
await getScrapeQueue().remove(jobId);
|
||||||
|
|
||||||
const endTime = new Date().getTime();
|
// const endTime = new Date().getTime();
|
||||||
const timeTakenInSeconds = (endTime - startTime) / 1000;
|
// const timeTakenInSeconds = (endTime - startTime) / 1000;
|
||||||
// const numTokens =
|
// const numTokens =
|
||||||
// doc && doc.extract
|
// doc && doc.extract
|
||||||
// // ? numTokensFromString(doc.markdown, "gpt-3.5-turbo")
|
// // ? numTokensFromString(doc.markdown, "gpt-3.5-turbo")
|
||||||
|
@ -235,7 +235,7 @@ export async function extractController(
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
console.log("docs", docs);
|
// console.log("docs", docs);
|
||||||
|
|
||||||
// {"message":"Missing required parameter: 'response_format.json_schema.schema'.","type":"invalid_request_error","param":"response_format.json_schema.schema","code":"missing_required_parameter"},"code":"missing_required_parameter","param":"response_format.json_schema.schema","type":"invalid_request_error"}
|
// {"message":"Missing required parameter: 'response_format.json_schema.schema'.","type":"invalid_request_error","param":"response_format.json_schema.schema","code":"missing_required_parameter"},"code":"missing_required_parameter","param":"response_format.json_schema.schema","type":"invalid_request_error"}
|
||||||
const completions = await generateFinalExtraction({
|
const completions = await generateFinalExtraction({
|
||||||
|
@ -245,7 +245,7 @@ export async function extractController(
|
||||||
schema: req.body.schema,
|
schema: req.body.schema,
|
||||||
});
|
});
|
||||||
|
|
||||||
console.log("completions", completions);
|
// console.log("completions", completions);
|
||||||
|
|
||||||
// if(req.body.extract && req.body.formats.includes("extract")) {
|
// if(req.body.extract && req.body.formats.includes("extract")) {
|
||||||
// creditsToBeBilled = 5;
|
// creditsToBeBilled = 5;
|
||||||
|
@ -317,7 +317,7 @@ export async function extractController(
|
||||||
|
|
||||||
return res.status(200).json({
|
return res.status(200).json({
|
||||||
success: true,
|
success: true,
|
||||||
data: {}, // includeMetadata ? mapResults : linksToReturn,
|
data: completions.content, // includeMetadata ? mapResults : linksToReturn,
|
||||||
scrape_id: id, //origin?.includes("website") ? id : undefined,
|
scrape_id: id, //origin?.includes("website") ? id : undefined,
|
||||||
});
|
});
|
||||||
}
|
}
|
|
@ -77,6 +77,9 @@ export async function generateFinalExtraction({
|
||||||
required: ["items"],
|
required: ["items"],
|
||||||
additionalProperties: false,
|
additionalProperties: false,
|
||||||
};
|
};
|
||||||
|
} else if (schema) {
|
||||||
|
schema.additionalProperties = false;
|
||||||
|
schema.required = Object.keys(schema.properties);
|
||||||
}
|
}
|
||||||
|
|
||||||
const jsonCompletion = await openai.beta.chat.completions.parse({
|
const jsonCompletion = await openai.beta.chat.completions.parse({
|
||||||
|
@ -96,7 +99,7 @@ export async function generateFinalExtraction({
|
||||||
type: "json_schema",
|
type: "json_schema",
|
||||||
json_schema: {
|
json_schema: {
|
||||||
name: "websiteContent",
|
name: "websiteContent",
|
||||||
schema: schema.shape,
|
schema: schema,
|
||||||
strict: true,
|
strict: true,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
|
@ -49,10 +49,10 @@ async function performRanking(links: string[], searchQuery: string) {
|
||||||
const linksAndScores = await Promise.all(links.map(async (link) => {
|
const linksAndScores = await Promise.all(links.map(async (link) => {
|
||||||
const linkEmbedding = await getEmbedding(link);
|
const linkEmbedding = await getEmbedding(link);
|
||||||
|
|
||||||
console.log("linkEmbedding", linkEmbedding);
|
// console.log("linkEmbedding", linkEmbedding);
|
||||||
// const linkVector = textToVector(searchQuery, link);
|
// const linkVector = textToVector(searchQuery, link);
|
||||||
const score = cosineSimilarity(queryEmbedding, linkEmbedding);
|
const score = cosineSimilarity(queryEmbedding, linkEmbedding);
|
||||||
console.log("score", score);
|
// console.log("score", score);
|
||||||
return { link, score };
|
return { link, score };
|
||||||
}));
|
}));
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user