This commit is contained in:
Nicolas 2024-11-14 15:34:02 -05:00
parent ebe9de2ac5
commit 22848af5ae
2 changed files with 9 additions and 8 deletions

View File

@ -35,6 +35,9 @@ describe("E2E Tests for Extract API Routes", () => {
if (author.includes("Gergő Móricz")) gotItRight++; if (author.includes("Gergő Móricz")) gotItRight++;
if (author.includes("Eric Ciarla")) gotItRight++; if (author.includes("Eric Ciarla")) gotItRight++;
if (author.includes("Nicolas Camara")) gotItRight++; if (author.includes("Nicolas Camara")) gotItRight++;
if (author.includes("Jon")) gotItRight++;
if (author.includes("Wendong")) gotItRight++;
} }
expect(gotItRight).toBeGreaterThan(1); expect(gotItRight).toBeGreaterThan(1);
@ -46,7 +49,7 @@ describe("E2E Tests for Extract API Routes", () => {
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
.set("Content-Type", "application/json") .set("Content-Type", "application/json")
.send({ .send({
urls: ["mendable.ai/*"], urls: ["firecrawl.dev/*"],
prompt: "Who are the founders of the company?", prompt: "Who are the founders of the company?",
allowExternalLinks: true, allowExternalLinks: true,
schema: { schema: {
@ -58,14 +61,16 @@ describe("E2E Tests for Extract API Routes", () => {
expect(response.body).toHaveProperty("data"); expect(response.body).toHaveProperty("data");
expect(response.body.data).toHaveProperty("founders"); expect(response.body.data).toHaveProperty("founders");
console.log(response.body.data?.founders);
let gotItRight = 0; let gotItRight = 0;
for (const founder of response.body.data?.founders) { for (const founder of response.body.data?.founders) {
if (founder.includes("Caleb")) gotItRight++; if (founder.includes("Caleb")) gotItRight++;
if (founder.includes("Eric")) gotItRight++; if (founder.includes("Eric")) gotItRight++;
if (founder.includes("Nicolas")) gotItRight++; if (founder.includes("Nicolas")) gotItRight++;
} }
expect(gotItRight).toBe(3); expect(gotItRight).toBeGreaterThanOrEqual(2);
}, 60000); }, 60000);
it.concurrent("should return hiring opportunities on firecrawl.dev (allowExternalLinks = true)", async () => { it.concurrent("should return hiring opportunities on firecrawl.dev (allowExternalLinks = true)", async () => {
@ -90,15 +95,12 @@ describe("E2E Tests for Extract API Routes", () => {
let gotItRight = 0; let gotItRight = 0;
for (const hiring of response.body.data?.items) { for (const hiring of response.body.data?.items) {
if (hiring.includes("Developer Relations Specialist")) gotItRight++;
if (hiring.includes("Web Automation Engineer")) gotItRight++;
if (hiring.includes("Developer Experience Engineer")) gotItRight++;
if (hiring.includes("Developer Support Engineer")) gotItRight++; if (hiring.includes("Developer Support Engineer")) gotItRight++;
if (hiring.includes("Dev Ops Engineer")) gotItRight++; if (hiring.includes("Dev Ops Engineer")) gotItRight++;
if (hiring.includes("Founding Web Automation Engineer")) gotItRight++; if (hiring.includes("Founding Web Automation Engineer")) gotItRight++;
} }
expect(gotItRight).toBeGreaterThan(5); expect(gotItRight).toBeGreaterThan(2);
}, 60000); }, 60000);
it.concurrent("should return PCI DSS compliance for Fivetran", async () => { it.concurrent("should return PCI DSS compliance for Fivetran", async () => {

View File

@ -29,7 +29,7 @@ const redis = new Redis(process.env.REDIS_URL!);
const MAX_EXTRACT_LIMIT = 100; const MAX_EXTRACT_LIMIT = 100;
const MAX_RANKING_LIMIT = 10; const MAX_RANKING_LIMIT = 10;
const SCORE_THRESHOLD = 0.70; const SCORE_THRESHOLD = 0.75;
export async function extractController( export async function extractController(
req: RequestWithAuth<{}, ExtractResponse, ExtractRequest>, req: RequestWithAuth<{}, ExtractResponse, ExtractRequest>,
@ -107,7 +107,6 @@ export async function extractController(
links.push(...processedUrls.flat()); links.push(...processedUrls.flat());
console.log("links", links.length); console.log("links", links.length);
console
// Scrape all links in parallel // Scrape all links in parallel
const scrapePromises = links.map(async (url) => { const scrapePromises = links.map(async (url) => {
const origin = req.body.origin || "api"; const origin = req.body.origin || "api";