mvp done?

This commit is contained in:
rafaelmmiller 2024-11-13 13:05:29 -03:00
parent a175c1513a
commit 25f32000db
3 changed files with 13 additions and 10 deletions

View File

@ -89,7 +89,7 @@ export async function extractController(
await redis.set(cacheKey, JSON.stringify(allResults), "EX", 24 * 60 * 60); // Cache for 24 hours
}
console.log("allResults", allResults);
// console.log("allResults", allResults);
// Parallelize sitemap fetch with serper search
const [sitemap, ...searchResults] = await Promise.all([
req.body.ignoreSitemap ? null : crawler.tryGetSitemap(),
@ -138,7 +138,7 @@ export async function extractController(
linksAndScores = await performRanking(links, searchQuery);
}
console.log("linksAndScores", linksAndScores);
// console.log("linksAndScores", linksAndScores);
links = links
.map((x) => {
@ -218,8 +218,8 @@ export async function extractController(
await getScrapeQueue().remove(jobId);
const endTime = new Date().getTime();
const timeTakenInSeconds = (endTime - startTime) / 1000;
// const endTime = new Date().getTime();
// const timeTakenInSeconds = (endTime - startTime) / 1000;
// const numTokens =
// doc && doc.extract
// // ? numTokensFromString(doc.markdown, "gpt-3.5-turbo")
@ -235,7 +235,7 @@ export async function extractController(
}
console.log("docs", docs);
// console.log("docs", docs);
// {"message":"Missing required parameter: 'response_format.json_schema.schema'.","type":"invalid_request_error","param":"response_format.json_schema.schema","code":"missing_required_parameter"},"code":"missing_required_parameter","param":"response_format.json_schema.schema","type":"invalid_request_error"}
const completions = await generateFinalExtraction({
@ -245,7 +245,7 @@ export async function extractController(
schema: req.body.schema,
});
console.log("completions", completions);
// console.log("completions", completions);
// if(req.body.extract && req.body.formats.includes("extract")) {
// creditsToBeBilled = 5;
@ -317,7 +317,7 @@ export async function extractController(
return res.status(200).json({
success: true,
data: {}, // includeMetadata ? mapResults : linksToReturn,
data: completions.content, // includeMetadata ? mapResults : linksToReturn,
scrape_id: id, //origin?.includes("website") ? id : undefined,
});
}

View File

@ -77,6 +77,9 @@ export async function generateFinalExtraction({
required: ["items"],
additionalProperties: false,
};
} else if (schema) {
schema.additionalProperties = false;
schema.required = Object.keys(schema.properties);
}
const jsonCompletion = await openai.beta.chat.completions.parse({
@ -96,7 +99,7 @@ export async function generateFinalExtraction({
type: "json_schema",
json_schema: {
name: "websiteContent",
schema: schema.shape,
schema: schema,
strict: true,
},
}

View File

@ -49,10 +49,10 @@ async function performRanking(links: string[], searchQuery: string) {
const linksAndScores = await Promise.all(links.map(async (link) => {
const linkEmbedding = await getEmbedding(link);
console.log("linkEmbedding", linkEmbedding);
// console.log("linkEmbedding", linkEmbedding);
// const linkVector = textToVector(searchQuery, link);
const score = cosineSimilarity(queryEmbedding, linkEmbedding);
console.log("score", score);
// console.log("score", score);
return { link, score };
}));