From 25f32000dbff7bc686963794b7be377e55bf8447 Mon Sep 17 00:00:00 2001 From: rafaelmmiller <150964962+rafaelsideguide@users.noreply.github.com> Date: Wed, 13 Nov 2024 13:05:29 -0300 Subject: [PATCH] mvp done? --- apps/api/src/controllers/v1/extract.ts | 14 +++++++------- apps/api/src/lib/extract/completions.ts | 5 ++++- apps/api/src/lib/ranker.ts | 4 ++-- 3 files changed, 13 insertions(+), 10 deletions(-) diff --git a/apps/api/src/controllers/v1/extract.ts b/apps/api/src/controllers/v1/extract.ts index 6f7aced1..d3c06c34 100644 --- a/apps/api/src/controllers/v1/extract.ts +++ b/apps/api/src/controllers/v1/extract.ts @@ -89,7 +89,7 @@ export async function extractController( await redis.set(cacheKey, JSON.stringify(allResults), "EX", 24 * 60 * 60); // Cache for 24 hours } - console.log("allResults", allResults); + // console.log("allResults", allResults); // Parallelize sitemap fetch with serper search const [sitemap, ...searchResults] = await Promise.all([ req.body.ignoreSitemap ? null : crawler.tryGetSitemap(), @@ -138,7 +138,7 @@ export async function extractController( linksAndScores = await performRanking(links, searchQuery); } - console.log("linksAndScores", linksAndScores); + // console.log("linksAndScores", linksAndScores); links = links .map((x) => { @@ -218,8 +218,8 @@ export async function extractController( await getScrapeQueue().remove(jobId); - const endTime = new Date().getTime(); - const timeTakenInSeconds = (endTime - startTime) / 1000; + // const endTime = new Date().getTime(); + // const timeTakenInSeconds = (endTime - startTime) / 1000; // const numTokens = // doc && doc.extract // // ? numTokensFromString(doc.markdown, "gpt-3.5-turbo") @@ -235,7 +235,7 @@ export async function extractController( } - console.log("docs", docs); + // console.log("docs", docs); // {"message":"Missing required parameter: 'response_format.json_schema.schema'.","type":"invalid_request_error","param":"response_format.json_schema.schema","code":"missing_required_parameter"},"code":"missing_required_parameter","param":"response_format.json_schema.schema","type":"invalid_request_error"} const completions = await generateFinalExtraction({ @@ -245,7 +245,7 @@ export async function extractController( schema: req.body.schema, }); - console.log("completions", completions); + // console.log("completions", completions); // if(req.body.extract && req.body.formats.includes("extract")) { // creditsToBeBilled = 5; @@ -317,7 +317,7 @@ export async function extractController( return res.status(200).json({ success: true, - data: {}, // includeMetadata ? mapResults : linksToReturn, + data: completions.content, // includeMetadata ? mapResults : linksToReturn, scrape_id: id, //origin?.includes("website") ? id : undefined, }); } \ No newline at end of file diff --git a/apps/api/src/lib/extract/completions.ts b/apps/api/src/lib/extract/completions.ts index fa75594b..c02b3e31 100644 --- a/apps/api/src/lib/extract/completions.ts +++ b/apps/api/src/lib/extract/completions.ts @@ -77,6 +77,9 @@ export async function generateFinalExtraction({ required: ["items"], additionalProperties: false, }; + } else if (schema) { + schema.additionalProperties = false; + schema.required = Object.keys(schema.properties); } const jsonCompletion = await openai.beta.chat.completions.parse({ @@ -96,7 +99,7 @@ export async function generateFinalExtraction({ type: "json_schema", json_schema: { name: "websiteContent", - schema: schema.shape, + schema: schema, strict: true, }, } diff --git a/apps/api/src/lib/ranker.ts b/apps/api/src/lib/ranker.ts index 6c1646da..7cd39820 100644 --- a/apps/api/src/lib/ranker.ts +++ b/apps/api/src/lib/ranker.ts @@ -49,10 +49,10 @@ async function performRanking(links: string[], searchQuery: string) { const linksAndScores = await Promise.all(links.map(async (link) => { const linkEmbedding = await getEmbedding(link); - console.log("linkEmbedding", linkEmbedding); + // console.log("linkEmbedding", linkEmbedding); // const linkVector = textToVector(searchQuery, link); const score = cosineSimilarity(queryEmbedding, linkEmbedding); - console.log("score", score); + // console.log("score", score); return { link, score }; }));