mvp done?

This commit is contained in:
rafaelmmiller 2024-11-13 13:05:29 -03:00
parent a175c1513a
commit 25f32000db
3 changed files with 13 additions and 10 deletions

View File

@ -89,7 +89,7 @@ export async function extractController(
await redis.set(cacheKey, JSON.stringify(allResults), "EX", 24 * 60 * 60); // Cache for 24 hours await redis.set(cacheKey, JSON.stringify(allResults), "EX", 24 * 60 * 60); // Cache for 24 hours
} }
console.log("allResults", allResults); // console.log("allResults", allResults);
// Parallelize sitemap fetch with serper search // Parallelize sitemap fetch with serper search
const [sitemap, ...searchResults] = await Promise.all([ const [sitemap, ...searchResults] = await Promise.all([
req.body.ignoreSitemap ? null : crawler.tryGetSitemap(), req.body.ignoreSitemap ? null : crawler.tryGetSitemap(),
@ -138,7 +138,7 @@ export async function extractController(
linksAndScores = await performRanking(links, searchQuery); linksAndScores = await performRanking(links, searchQuery);
} }
console.log("linksAndScores", linksAndScores); // console.log("linksAndScores", linksAndScores);
links = links links = links
.map((x) => { .map((x) => {
@ -218,8 +218,8 @@ export async function extractController(
await getScrapeQueue().remove(jobId); await getScrapeQueue().remove(jobId);
const endTime = new Date().getTime(); // const endTime = new Date().getTime();
const timeTakenInSeconds = (endTime - startTime) / 1000; // const timeTakenInSeconds = (endTime - startTime) / 1000;
// const numTokens = // const numTokens =
// doc && doc.extract // doc && doc.extract
// // ? numTokensFromString(doc.markdown, "gpt-3.5-turbo") // // ? numTokensFromString(doc.markdown, "gpt-3.5-turbo")
@ -235,7 +235,7 @@ export async function extractController(
} }
console.log("docs", docs); // console.log("docs", docs);
// {"message":"Missing required parameter: 'response_format.json_schema.schema'.","type":"invalid_request_error","param":"response_format.json_schema.schema","code":"missing_required_parameter"},"code":"missing_required_parameter","param":"response_format.json_schema.schema","type":"invalid_request_error"} // {"message":"Missing required parameter: 'response_format.json_schema.schema'.","type":"invalid_request_error","param":"response_format.json_schema.schema","code":"missing_required_parameter"},"code":"missing_required_parameter","param":"response_format.json_schema.schema","type":"invalid_request_error"}
const completions = await generateFinalExtraction({ const completions = await generateFinalExtraction({
@ -245,7 +245,7 @@ export async function extractController(
schema: req.body.schema, schema: req.body.schema,
}); });
console.log("completions", completions); // console.log("completions", completions);
// if(req.body.extract && req.body.formats.includes("extract")) { // if(req.body.extract && req.body.formats.includes("extract")) {
// creditsToBeBilled = 5; // creditsToBeBilled = 5;
@ -317,7 +317,7 @@ export async function extractController(
return res.status(200).json({ return res.status(200).json({
success: true, success: true,
data: {}, // includeMetadata ? mapResults : linksToReturn, data: completions.content, // includeMetadata ? mapResults : linksToReturn,
scrape_id: id, //origin?.includes("website") ? id : undefined, scrape_id: id, //origin?.includes("website") ? id : undefined,
}); });
} }

View File

@ -77,6 +77,9 @@ export async function generateFinalExtraction({
required: ["items"], required: ["items"],
additionalProperties: false, additionalProperties: false,
}; };
} else if (schema) {
schema.additionalProperties = false;
schema.required = Object.keys(schema.properties);
} }
const jsonCompletion = await openai.beta.chat.completions.parse({ const jsonCompletion = await openai.beta.chat.completions.parse({
@ -96,7 +99,7 @@ export async function generateFinalExtraction({
type: "json_schema", type: "json_schema",
json_schema: { json_schema: {
name: "websiteContent", name: "websiteContent",
schema: schema.shape, schema: schema,
strict: true, strict: true,
}, },
} }

View File

@ -49,10 +49,10 @@ async function performRanking(links: string[], searchQuery: string) {
const linksAndScores = await Promise.all(links.map(async (link) => { const linksAndScores = await Promise.all(links.map(async (link) => {
const linkEmbedding = await getEmbedding(link); const linkEmbedding = await getEmbedding(link);
console.log("linkEmbedding", linkEmbedding); // console.log("linkEmbedding", linkEmbedding);
// const linkVector = textToVector(searchQuery, link); // const linkVector = textToVector(searchQuery, link);
const score = cosineSimilarity(queryEmbedding, linkEmbedding); const score = cosineSimilarity(queryEmbedding, linkEmbedding);
console.log("score", score); // console.log("score", score);
return { link, score }; return { link, score };
})); }));