mirror of
https://github.com/mendableai/firecrawl.git
synced 2024-11-16 11:42:24 +08:00
Nick: test suite init
This commit is contained in:
parent
056b0ec24d
commit
ad58bc2820
4
.gitignore
vendored
4
.gitignore
vendored
|
@ -10,3 +10,7 @@ apps/js-sdk/node_modules/
|
||||||
apps/api/.env.local
|
apps/api/.env.local
|
||||||
|
|
||||||
apps/test-suite/node_modules/
|
apps/test-suite/node_modules/
|
||||||
|
|
||||||
|
|
||||||
|
apps/test-suite/.env
|
||||||
|
apps/test-suite/logs
|
|
@ -1,3 +1,4 @@
|
||||||
OPENAI_API_KEY=
|
OPENAI_API_KEY=
|
||||||
TEST_API_KEY=
|
TEST_API_KEY=
|
||||||
TEST_URL=http://localhost:3002
|
TEST_URL=http://localhost:3002
|
||||||
|
ANTHROPIC_API_KEY=
|
||||||
|
|
57
apps/test-suite/data/websites.json
Normal file
57
apps/test-suite/data/websites.json
Normal file
|
@ -0,0 +1,57 @@
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"website":"https://www.anthropic.com/claude",
|
||||||
|
"prompt":"Does this website contain pricing information?",
|
||||||
|
"expected_output":"yes"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"website":"https://mendable.ai/pricing",
|
||||||
|
"prompt":"Does this website contain pricing information?",
|
||||||
|
"expected_output":"yes"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"website":"https://openai.com/news",
|
||||||
|
"prompt":"Does this website contain a list of research news?",
|
||||||
|
"expected_output":"yes"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"website":"https://agentops.ai",
|
||||||
|
"prompt":"Does this website contain a code snippets?",
|
||||||
|
"expected_output":"yes"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"website":"https://ycombinator.com/companies",
|
||||||
|
"prompt":"Does this website contain a list bigger than 5 of ycombinator companies?",
|
||||||
|
"expected_output":"yes"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"website":"https://firecrawl.dev",
|
||||||
|
"prompt":"Does this website contain a list bigger than 5 of ycombinator companies?",
|
||||||
|
"expected_output":"yes"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"website":"https://en.wikipedia.org/wiki/T._N._Seshan",
|
||||||
|
"prompt":"Does this website talk about Seshan's career?",
|
||||||
|
"expected_output":"yes"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"website":"https://mendable.ai/blog",
|
||||||
|
"prompt":"Does this website contain multiple blog articles?",
|
||||||
|
"expected_output":"yes"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"website":"https://mendable.ai/blog",
|
||||||
|
"prompt":"Does this website contain multiple blog articles?",
|
||||||
|
"expected_output":"yes"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"website":"https://news.ycombinator.com/",
|
||||||
|
"prompt":"Does this website contain a list of articles in a table markdown format?",
|
||||||
|
"expected_output":"yes"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"website":"https://www.vellum.ai/llm-leaderboard",
|
||||||
|
"prompt":"Does this website contain a model comparison table?",
|
||||||
|
"expected_output":"yes"
|
||||||
|
}
|
||||||
|
]
|
|
@ -1,12 +1,23 @@
|
||||||
import request from "supertest";
|
import request from "supertest";
|
||||||
import dotenv from "dotenv";
|
import dotenv from "dotenv";
|
||||||
import { OpenAI } from "openai";
|
import Anthropic from "@anthropic-ai/sdk";
|
||||||
import path from "path";
|
import { numTokensFromString } from "./utils/tokens";
|
||||||
import playwright from "playwright";
|
import OpenAI from "openai";
|
||||||
const fs = require('fs').promises;
|
import { WebsiteScrapeError } from "./utils/types";
|
||||||
|
import { logErrors } from "./utils/log";
|
||||||
|
const websitesData = require("./data/websites.json");
|
||||||
|
import "dotenv/config";
|
||||||
|
|
||||||
|
const fs = require('fs');
|
||||||
|
|
||||||
dotenv.config();
|
dotenv.config();
|
||||||
|
|
||||||
|
interface WebsiteData {
|
||||||
|
website: string;
|
||||||
|
prompt: string;
|
||||||
|
expected_output: string;
|
||||||
|
}
|
||||||
|
|
||||||
describe("Scraping/Crawling Checkup (E2E)", () => {
|
describe("Scraping/Crawling Checkup (E2E)", () => {
|
||||||
beforeAll(() => {
|
beforeAll(() => {
|
||||||
if (!process.env.TEST_API_KEY) {
|
if (!process.env.TEST_API_KEY) {
|
||||||
|
@ -20,195 +31,128 @@ describe("Scraping/Crawling Checkup (E2E)", () => {
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
// restore original process.env
|
describe("Scraping website dataset", () => {
|
||||||
afterAll(() => {
|
it("Should scrape the website and prompt it against Claude", async () => {
|
||||||
// process.env = originalEnv;
|
let passedTests = 0;
|
||||||
});
|
const batchSize = 5;
|
||||||
|
const batchPromises = [];
|
||||||
|
let totalTokens = 0;
|
||||||
|
|
||||||
describe("Scraping static websites", () => {
|
const startTime = new Date().getTime();
|
||||||
it("should scrape the content of 5 static websites", async () => {
|
const date = new Date();
|
||||||
const urls = [
|
const logsDir = `logs/${date.getMonth() + 1}-${date.getDate()}-${date.getFullYear()}`;
|
||||||
'https://www.mendable.ai/blog/coachgtm-mongodb',
|
|
||||||
'https://www.mendable.ai/blog/building-safe-rag',
|
|
||||||
'https://www.mendable.ai/blog/gdpr-repository-pattern',
|
|
||||||
'https://www.mendable.ai/blog/how-mendable-leverages-langsmith-to-debug-tools-and-actions',
|
|
||||||
'https://www.mendable.ai/blog/european-data-storage'
|
|
||||||
];
|
|
||||||
const expectedContent = [
|
|
||||||
"CoachGTM, a Mendable AI Slack bot powered by MongoDB Atlas Vector Search, equips MongoDB’s teams with the knowledge and expertise they need to engage with customers meaningfully, reducing the risk of churn and fostering lasting relationships.",
|
|
||||||
"You should consider security if you’re building LLM (Large Language Models) systems for enterprise. Over 67% percent of enterprise CEOs report a lack of trust in AI. An LLM system must protect sensitive data and refuse to take dangerous actions or it can’t be deployed in an enterprise.",
|
|
||||||
"The biggest obstacle we encountered was breaking the strong dependency on a specific database throughout all our functions. This required weeks of diligent effort from our teams. Despite the hurdles, we remained committed to pushing forward, fixing bugs, and ultimately reaching our goal.",
|
|
||||||
"It is no secret that 2024 will be the year we start seeing more LLMs baked into our workflows. This means that the way we interact with LLM models will be less just Question and Answer and more action-based.",
|
|
||||||
"A major request from many of our enterprise customers has been the option for data storage in Europe. Although our existing Data Processing Agreement (DPA) with our current provider met the needs of many customers, the location of our data storage led to some potential clients choosing to wait until we had European storage."
|
|
||||||
]
|
|
||||||
|
|
||||||
const responses = await Promise.all(urls.map(url =>
|
let errorLogFileName = `${logsDir}/run.log_${new Date().toTimeString().split(' ')[0]}`;
|
||||||
request(process.env.TEST_URL || '')
|
const errorLog: WebsiteScrapeError[] = [];
|
||||||
.post("/v0/scrape")
|
|
||||||
.set("Content-Type", "application/json")
|
|
||||||
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
|
||||||
.send({ url })
|
|
||||||
));
|
|
||||||
|
|
||||||
for (const response of responses) {
|
|
||||||
expect(response.statusCode).toBe(200);
|
|
||||||
expect(response.body.data).toHaveProperty("content");
|
|
||||||
expect(response.body.data).toHaveProperty("markdown");
|
|
||||||
expect(response.body.data).toHaveProperty("metadata");
|
|
||||||
expect(response.body.data.content).toContain(expectedContent[responses.indexOf(response)]);
|
|
||||||
}
|
|
||||||
}, 15000); // 15 seconds timeout
|
|
||||||
})
|
|
||||||
|
|
||||||
describe("Crawling hacker news dynamic websites", () => {
|
for (let i = 0; i < websitesData.length; i += batchSize) {
|
||||||
it("should return crawl hacker news, retrieve {numberOfPages} pages, get using firecrawl vs LLM Vision and successfully compare both", async () => {
|
const batch = websitesData.slice(i, i + batchSize);
|
||||||
const numberOfPages = 100;
|
const batchPromise = Promise.all(
|
||||||
|
batch.map(async (websiteData: WebsiteData) => {
|
||||||
const hackerNewsScrape = await request(process.env.TEST_URL || '')
|
|
||||||
.post("/v0/scrape")
|
|
||||||
.set("Content-Type", "application/json")
|
|
||||||
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
|
||||||
.send({ url: "https://news.ycombinator.com/" });
|
|
||||||
|
|
||||||
const scrapeUrls = [...await getRandomLinksFromContent({
|
|
||||||
content: hackerNewsScrape.body.data.markdown,
|
|
||||||
excludes: ['ycombinator.com', '.pdf'],
|
|
||||||
limit: numberOfPages
|
|
||||||
})];
|
|
||||||
|
|
||||||
const fireCrawlResponses = await Promise.all(scrapeUrls.map(url =>
|
|
||||||
request(process.env.TEST_URL || '')
|
|
||||||
.post("/v0/scrape")
|
|
||||||
.set("Content-Type", "application/json")
|
|
||||||
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
|
||||||
.send({ url })
|
|
||||||
));
|
|
||||||
|
|
||||||
const visionResponses = await Promise.all(scrapeUrls.map(url => {
|
|
||||||
return getPageContentByScreenshot(url);
|
|
||||||
}));
|
|
||||||
|
|
||||||
let successCount = 0;
|
|
||||||
const fireCrawlContents = fireCrawlResponses.map(response => response.body?.data?.content ? response.body.data.content : '');
|
|
||||||
for (let i = 0; i < scrapeUrls.length; i++) {
|
|
||||||
if (fuzzyContains({
|
|
||||||
largeText: fireCrawlContents[i],
|
|
||||||
queryText: visionResponses[i],
|
|
||||||
threshold: 0.8
|
|
||||||
})) {
|
|
||||||
successCount += 1;
|
|
||||||
} else {
|
|
||||||
console.log(`Failed to match content for ${scrapeUrls[i]}`);
|
|
||||||
console.log(`Firecrawl: ${fireCrawlContents[i]}`);
|
|
||||||
console.log(`Vision: ${visionResponses[i]}`);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
expect(successCount/scrapeUrls.length).toBeGreaterThanOrEqual(0.9);
|
|
||||||
|
|
||||||
}, 120000); // 120 seconds
|
|
||||||
});
|
|
||||||
});
|
|
||||||
|
|
||||||
const getImageDescription = async (
|
|
||||||
imagePath: string
|
|
||||||
): Promise<string> => {
|
|
||||||
try {
|
try {
|
||||||
const prompt = `
|
const scrapedContent = await request(process.env.TEST_URL || "")
|
||||||
Get a part of the written content inside the website.
|
.post("/v0/scrape")
|
||||||
We are going to compare if the content we retrieve contains the content of the screenshot.
|
.set("Content-Type", "application/json")
|
||||||
Use an easy verifiable content with close to 150 characters.
|
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
||||||
Answer using this template: 'Content: [CONTENT]'
|
.send({ url: websiteData.website });
|
||||||
`
|
|
||||||
|
|
||||||
if (!process.env.OPENAI_API_KEY) {
|
if (scrapedContent.statusCode !== 200) {
|
||||||
throw new Error("No OpenAI API key provided");
|
console.error(`Failed to scrape ${websiteData.website}`);
|
||||||
|
return null;
|
||||||
}
|
}
|
||||||
// const imageMediaType = 'image/png';
|
|
||||||
const imageBuffer = await fs.readFile(imagePath);
|
|
||||||
const imageData = imageBuffer.toString('base64');
|
|
||||||
|
|
||||||
const openai = new OpenAI();
|
const anthropic = new Anthropic({
|
||||||
|
apiKey: process.env.ANTHROPIC_API_KEY,
|
||||||
|
});
|
||||||
|
|
||||||
const response = await openai.chat.completions.create({
|
const openai = new OpenAI({
|
||||||
|
apiKey: process.env.OPENAI_API_KEY,
|
||||||
|
});
|
||||||
|
|
||||||
|
const prompt = `Based on this markdown extracted from a website html page, ${websiteData.prompt} Just say 'yes' or 'no' to the question.\nWebsite markdown: ${scrapedContent.body.data.markdown}\n`;
|
||||||
|
|
||||||
|
|
||||||
|
const msg = await openai.chat.completions.create({
|
||||||
model: "gpt-4-turbo",
|
model: "gpt-4-turbo",
|
||||||
|
max_tokens: 100,
|
||||||
|
temperature: 0,
|
||||||
messages: [
|
messages: [
|
||||||
{
|
{
|
||||||
role: "user",
|
role: "user",
|
||||||
content: [
|
content: prompt
|
||||||
{
|
|
||||||
type: "text",
|
|
||||||
text: prompt,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
type: "image_url",
|
|
||||||
image_url: {
|
|
||||||
"url": "data:image/png;base64," + imageData
|
|
||||||
}
|
|
||||||
},
|
|
||||||
],
|
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
});
|
});
|
||||||
|
|
||||||
return response.choices[0].message.content?.replace("Content: ", "") || '';
|
if (!msg) {
|
||||||
|
console.error(`Failed to prompt for ${websiteData.website}`);
|
||||||
|
errorLog.push({
|
||||||
|
website: websiteData.website,
|
||||||
|
prompt: websiteData.prompt,
|
||||||
|
expected_output: websiteData.expected_output,
|
||||||
|
actual_output: "",
|
||||||
|
error: "Failed to prompt... model error."
|
||||||
|
});
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
const actualOutput = (msg.choices[0].message.content ?? "").toLowerCase()
|
||||||
|
const expectedOutput = websiteData.expected_output.toLowerCase();
|
||||||
|
|
||||||
|
const numTokens = numTokensFromString(prompt,"gpt-4") + numTokensFromString(actualOutput,"gpt-4");
|
||||||
|
|
||||||
|
totalTokens += numTokens;
|
||||||
|
if (actualOutput.includes(expectedOutput)) {
|
||||||
|
passedTests++;
|
||||||
|
} else {
|
||||||
|
console.error(
|
||||||
|
`This website failed the test: ${websiteData.website}`
|
||||||
|
);
|
||||||
|
console.error(`Actual output: ${actualOutput}`);
|
||||||
|
errorLog.push({
|
||||||
|
website: websiteData.website,
|
||||||
|
prompt: websiteData.prompt,
|
||||||
|
expected_output: websiteData.expected_output,
|
||||||
|
actual_output: actualOutput,
|
||||||
|
error: "Output mismatch"
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
website: websiteData.website,
|
||||||
|
prompt: websiteData.prompt,
|
||||||
|
expectedOutput,
|
||||||
|
actualOutput,
|
||||||
|
};
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
// console.error("Error generating content from screenshot:", error);
|
console.error(
|
||||||
return '';
|
`Error processing ${websiteData.website}: ${error}`
|
||||||
|
);
|
||||||
|
return null;
|
||||||
}
|
}
|
||||||
|
})
|
||||||
|
);
|
||||||
|
batchPromises.push(batchPromise);
|
||||||
}
|
}
|
||||||
|
|
||||||
const getPageContentByScreenshot = async (url: string): Promise<string> => {
|
const responses = (await Promise.all(batchPromises)).flat();
|
||||||
try {
|
const validResponses = responses.filter((response) => response !== null);
|
||||||
const screenshotPath = path.join(__dirname, "assets/test_screenshot.png");
|
const score = (passedTests / validResponses.length) * 100;
|
||||||
const browser = await playwright.chromium.launch();
|
const endTime = new Date().getTime();
|
||||||
const page = await browser.newPage();
|
const timeTaken = (endTime - startTime) / 1000;
|
||||||
await page.goto(url);
|
console.log(`Score: ${score}%`);
|
||||||
await page.screenshot({ path: screenshotPath });
|
console.log(`Total tokens: ${totalTokens}`);
|
||||||
await browser.close();
|
|
||||||
return await getImageDescription(screenshotPath);
|
if (errorLog.length > 0) {
|
||||||
} catch (error) {
|
if (!fs.existsSync(logsDir)){
|
||||||
// console.error("Error generating content from screenshot:", error);
|
fs.mkdirSync(logsDir, { recursive: true });
|
||||||
return '';
|
|
||||||
}
|
}
|
||||||
|
fs.writeFileSync(errorLogFileName, JSON.stringify(errorLog, null, 2));
|
||||||
|
logErrors(errorLog, timeTaken, totalTokens, score);
|
||||||
}
|
}
|
||||||
|
|
||||||
const getRandomLinksFromContent = async (options: { content: string, excludes: string[], limit: number }): Promise<string[]> => {
|
expect(score).toBeGreaterThanOrEqual(90);
|
||||||
const regex = /(?<=\()https:\/\/(.*?)(?=\))/g;
|
}, 150000); // 150 seconds timeout
|
||||||
const links = options.content.match(regex);
|
});
|
||||||
const filteredLinks = links ? links.filter(link => !options.excludes.some(exclude => link.includes(exclude))) : [];
|
});
|
||||||
const uniqueLinks = [...new Set(filteredLinks)]; // Ensure all links are unique
|
|
||||||
const randomLinks = [];
|
|
||||||
while (randomLinks.length < options.limit && uniqueLinks.length > 0) {
|
|
||||||
const randomIndex = Math.floor(Math.random() * uniqueLinks.length);
|
|
||||||
randomLinks.push(uniqueLinks.splice(randomIndex, 1)[0]);
|
|
||||||
}
|
|
||||||
return randomLinks;
|
|
||||||
}
|
|
||||||
|
|
||||||
function fuzzyContains(options: {
|
|
||||||
largeText: string,
|
|
||||||
queryText: string,
|
|
||||||
threshold?: number
|
|
||||||
}): boolean {
|
|
||||||
// Normalize texts: lowercasing and removing non-alphanumeric characters
|
|
||||||
const normalize = (text: string) => text.toLowerCase().replace(/[^a-z0-9]+/g, ' ');
|
|
||||||
|
|
||||||
const normalizedLargeText = normalize(options.largeText);
|
|
||||||
const normalizedQueryText = normalize(options.queryText);
|
|
||||||
|
|
||||||
// Split the query into words
|
|
||||||
const queryWords = normalizedQueryText.split(/\s+/);
|
|
||||||
|
|
||||||
// Count how many query words are in the large text
|
|
||||||
const matchCount = queryWords.reduce((count, word) => {
|
|
||||||
return count + (normalizedLargeText.includes(word) ? 1 : 0);
|
|
||||||
}, 0);
|
|
||||||
|
|
||||||
// Calculate the percentage of words matched
|
|
||||||
const matchPercentage = matchCount / queryWords.length;
|
|
||||||
|
|
||||||
// Check if the match percentage meets or exceeds the threshold
|
|
||||||
return matchPercentage >= (options.threshold || 0.8);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
|
@ -9,6 +9,8 @@
|
||||||
"license": "ISC",
|
"license": "ISC",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@anthropic-ai/sdk": "^0.20.8",
|
"@anthropic-ai/sdk": "^0.20.8",
|
||||||
|
"@dqbd/tiktoken": "^1.0.14",
|
||||||
|
"@supabase/supabase-js": "^2.43.1",
|
||||||
"dotenv": "^16.4.5",
|
"dotenv": "^16.4.5",
|
||||||
"jest": "^29.7.0",
|
"jest": "^29.7.0",
|
||||||
"openai": "^4.40.2",
|
"openai": "^4.40.2",
|
||||||
|
|
|
@ -8,6 +8,12 @@ dependencies:
|
||||||
'@anthropic-ai/sdk':
|
'@anthropic-ai/sdk':
|
||||||
specifier: ^0.20.8
|
specifier: ^0.20.8
|
||||||
version: 0.20.8
|
version: 0.20.8
|
||||||
|
'@dqbd/tiktoken':
|
||||||
|
specifier: ^1.0.14
|
||||||
|
version: 1.0.14
|
||||||
|
'@supabase/supabase-js':
|
||||||
|
specifier: ^2.43.1
|
||||||
|
version: 2.43.1
|
||||||
dotenv:
|
dotenv:
|
||||||
specifier: ^16.4.5
|
specifier: ^16.4.5
|
||||||
version: 16.4.5
|
version: 16.4.5
|
||||||
|
@ -390,6 +396,10 @@ packages:
|
||||||
resolution: {integrity: sha512-0hYQ8SB4Db5zvZB4axdMHGwEaQjkZzFjQiN9LVYvIFB2nSUHW9tYpxWriPrWDASIxiaXax83REcLxuSdnGPZtw==}
|
resolution: {integrity: sha512-0hYQ8SB4Db5zvZB4axdMHGwEaQjkZzFjQiN9LVYvIFB2nSUHW9tYpxWriPrWDASIxiaXax83REcLxuSdnGPZtw==}
|
||||||
dev: false
|
dev: false
|
||||||
|
|
||||||
|
/@dqbd/tiktoken@1.0.14:
|
||||||
|
resolution: {integrity: sha512-R+Z1cVYOc8ZoDls6T2YhlUYrwKyuZoRJsSK3vN7iWWjBJ1xoX7e5BhUkEh5n6cXuMWQVUTHLlSDpnyv0Ye7xxw==}
|
||||||
|
dev: false
|
||||||
|
|
||||||
/@istanbuljs/load-nyc-config@1.1.0:
|
/@istanbuljs/load-nyc-config@1.1.0:
|
||||||
resolution: {integrity: sha512-VjeHSlIzpv/NyD3N0YuHfXOPDIixcA1q2ZV98wsMqcYlPmv2n3Yb2lYP9XMElnaFVXg5A7YLTeLu6V84uQDjmQ==}
|
resolution: {integrity: sha512-VjeHSlIzpv/NyD3N0YuHfXOPDIixcA1q2ZV98wsMqcYlPmv2n3Yb2lYP9XMElnaFVXg5A7YLTeLu6V84uQDjmQ==}
|
||||||
engines: {node: '>=8'}
|
engines: {node: '>=8'}
|
||||||
|
@ -662,6 +672,63 @@ packages:
|
||||||
'@sinonjs/commons': 3.0.1
|
'@sinonjs/commons': 3.0.1
|
||||||
dev: false
|
dev: false
|
||||||
|
|
||||||
|
/@supabase/auth-js@2.64.2:
|
||||||
|
resolution: {integrity: sha512-s+lkHEdGiczDrzXJ1YWt2y3bxRi+qIUnXcgkpLSrId7yjBeaXBFygNjTaoZLG02KNcYwbuZ9qkEIqmj2hF7svw==}
|
||||||
|
dependencies:
|
||||||
|
'@supabase/node-fetch': 2.6.15
|
||||||
|
dev: false
|
||||||
|
|
||||||
|
/@supabase/functions-js@2.3.1:
|
||||||
|
resolution: {integrity: sha512-QyzNle/rVzlOi4BbVqxLSH828VdGY1RElqGFAj+XeVypj6+PVtMlD21G8SDnsPQDtlqqTtoGRgdMlQZih5hTuw==}
|
||||||
|
dependencies:
|
||||||
|
'@supabase/node-fetch': 2.6.15
|
||||||
|
dev: false
|
||||||
|
|
||||||
|
/@supabase/node-fetch@2.6.15:
|
||||||
|
resolution: {integrity: sha512-1ibVeYUacxWYi9i0cf5efil6adJ9WRyZBLivgjs+AUpewx1F3xPi7gLgaASI2SmIQxPoCEjAsLAzKPgMJVgOUQ==}
|
||||||
|
engines: {node: 4.x || >=6.0.0}
|
||||||
|
dependencies:
|
||||||
|
whatwg-url: 5.0.0
|
||||||
|
dev: false
|
||||||
|
|
||||||
|
/@supabase/postgrest-js@1.15.2:
|
||||||
|
resolution: {integrity: sha512-9/7pUmXExvGuEK1yZhVYXPZnLEkDTwxgMQHXLrN5BwPZZm4iUCL1YEyep/Z2lIZah8d8M433mVAUEGsihUj5KQ==}
|
||||||
|
dependencies:
|
||||||
|
'@supabase/node-fetch': 2.6.15
|
||||||
|
dev: false
|
||||||
|
|
||||||
|
/@supabase/realtime-js@2.9.5:
|
||||||
|
resolution: {integrity: sha512-TEHlGwNGGmKPdeMtca1lFTYCedrhTAv3nZVoSjrKQ+wkMmaERuCe57zkC5KSWFzLYkb5FVHW8Hrr+PX1DDwplQ==}
|
||||||
|
dependencies:
|
||||||
|
'@supabase/node-fetch': 2.6.15
|
||||||
|
'@types/phoenix': 1.6.4
|
||||||
|
'@types/ws': 8.5.10
|
||||||
|
ws: 8.17.0
|
||||||
|
transitivePeerDependencies:
|
||||||
|
- bufferutil
|
||||||
|
- utf-8-validate
|
||||||
|
dev: false
|
||||||
|
|
||||||
|
/@supabase/storage-js@2.5.5:
|
||||||
|
resolution: {integrity: sha512-OpLoDRjFwClwc2cjTJZG8XviTiQH4Ik8sCiMK5v7et0MDu2QlXjCAW3ljxJB5+z/KazdMOTnySi+hysxWUPu3w==}
|
||||||
|
dependencies:
|
||||||
|
'@supabase/node-fetch': 2.6.15
|
||||||
|
dev: false
|
||||||
|
|
||||||
|
/@supabase/supabase-js@2.43.1:
|
||||||
|
resolution: {integrity: sha512-A+RV50mWNtyKo6M0u4G6AOqEifQD+MoOjZcpRkPMPpEAFgMsc2dt3kBlBlR/MgZizWQgUKhsvrwKk0efc8g6Ug==}
|
||||||
|
dependencies:
|
||||||
|
'@supabase/auth-js': 2.64.2
|
||||||
|
'@supabase/functions-js': 2.3.1
|
||||||
|
'@supabase/node-fetch': 2.6.15
|
||||||
|
'@supabase/postgrest-js': 1.15.2
|
||||||
|
'@supabase/realtime-js': 2.9.5
|
||||||
|
'@supabase/storage-js': 2.5.5
|
||||||
|
transitivePeerDependencies:
|
||||||
|
- bufferutil
|
||||||
|
- utf-8-validate
|
||||||
|
dev: false
|
||||||
|
|
||||||
/@types/babel__core@7.20.5:
|
/@types/babel__core@7.20.5:
|
||||||
resolution: {integrity: sha512-qoQprZvz5wQFJwMDqeseRXWv3rqMvhgpbXFfVyWhbx9X47POIA6i/+dXefEmZKoAgOaTdaIgNSMqMIU61yRyzA==}
|
resolution: {integrity: sha512-qoQprZvz5wQFJwMDqeseRXWv3rqMvhgpbXFfVyWhbx9X47POIA6i/+dXefEmZKoAgOaTdaIgNSMqMIU61yRyzA==}
|
||||||
dependencies:
|
dependencies:
|
||||||
|
@ -737,6 +804,10 @@ packages:
|
||||||
dependencies:
|
dependencies:
|
||||||
undici-types: 5.26.5
|
undici-types: 5.26.5
|
||||||
|
|
||||||
|
/@types/phoenix@1.6.4:
|
||||||
|
resolution: {integrity: sha512-B34A7uot1Cv0XtaHRYDATltAdKx0BvVKNgYNqE4WjtPUa4VQJM7kxeXcVKaH+KS+kCmZ+6w+QaUdcljiheiBJA==}
|
||||||
|
dev: false
|
||||||
|
|
||||||
/@types/stack-utils@2.0.3:
|
/@types/stack-utils@2.0.3:
|
||||||
resolution: {integrity: sha512-9aEbYZ3TbYMznPdcdr3SmIrLXwC/AKZXQeCf9Pgao5CKb8CyHuEX5jzWPTkvregvhRJHcpRO6BFoGW9ycaOkYw==}
|
resolution: {integrity: sha512-9aEbYZ3TbYMznPdcdr3SmIrLXwC/AKZXQeCf9Pgao5CKb8CyHuEX5jzWPTkvregvhRJHcpRO6BFoGW9ycaOkYw==}
|
||||||
|
|
||||||
|
@ -755,6 +826,12 @@ packages:
|
||||||
'@types/superagent': 8.1.6
|
'@types/superagent': 8.1.6
|
||||||
dev: true
|
dev: true
|
||||||
|
|
||||||
|
/@types/ws@8.5.10:
|
||||||
|
resolution: {integrity: sha512-vmQSUcfalpIq0R9q7uTo2lXs6eGIpt9wtnLdMv9LVpIjCA/+ufZRozlVoVelIYixx1ugCBKDhn89vnsEGOCx9A==}
|
||||||
|
dependencies:
|
||||||
|
'@types/node': 18.19.31
|
||||||
|
dev: false
|
||||||
|
|
||||||
/@types/yargs-parser@21.0.3:
|
/@types/yargs-parser@21.0.3:
|
||||||
resolution: {integrity: sha512-I4q9QU9MQv4oEOz4tAHJtNz1cwuLxn2F3xcc2iV5WdqLPpUnj30aUuxt1mAxYTG+oe8CZMV/+6rU4S4gRDzqtQ==}
|
resolution: {integrity: sha512-I4q9QU9MQv4oEOz4tAHJtNz1cwuLxn2F3xcc2iV5WdqLPpUnj30aUuxt1mAxYTG+oe8CZMV/+6rU4S4gRDzqtQ==}
|
||||||
|
|
||||||
|
@ -2619,6 +2696,19 @@ packages:
|
||||||
signal-exit: 3.0.7
|
signal-exit: 3.0.7
|
||||||
dev: false
|
dev: false
|
||||||
|
|
||||||
|
/ws@8.17.0:
|
||||||
|
resolution: {integrity: sha512-uJq6108EgZMAl20KagGkzCKfMEjxmKvZHG7Tlq0Z6nOky7YF7aq4mOx6xK8TJ/i1LeK4Qus7INktacctDgY8Ow==}
|
||||||
|
engines: {node: '>=10.0.0'}
|
||||||
|
peerDependencies:
|
||||||
|
bufferutil: ^4.0.1
|
||||||
|
utf-8-validate: '>=5.0.2'
|
||||||
|
peerDependenciesMeta:
|
||||||
|
bufferutil:
|
||||||
|
optional: true
|
||||||
|
utf-8-validate:
|
||||||
|
optional: true
|
||||||
|
dev: false
|
||||||
|
|
||||||
/y18n@5.0.8:
|
/y18n@5.0.8:
|
||||||
resolution: {integrity: sha512-0pfFzegeDWJHJIAmTLRP2DwHjdF5s7jo9tuztdQxAhINCdvS+3nGINqPd00AphqJR/0LhANUS6/+7SCb98YOfA==}
|
resolution: {integrity: sha512-0pfFzegeDWJHJIAmTLRP2DwHjdF5s7jo9tuztdQxAhINCdvS+3nGINqPd00AphqJR/0LhANUS6/+7SCb98YOfA==}
|
||||||
engines: {node: '>=10'}
|
engines: {node: '>=10'}
|
||||||
|
|
10
apps/test-suite/utils/log.ts
Normal file
10
apps/test-suite/utils/log.ts
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
import { supabase_service } from "./supabase";
|
||||||
|
import { WebsiteScrapeError } from "./types";
|
||||||
|
|
||||||
|
export async function logErrors(dataError: WebsiteScrapeError[], time_taken: number, num_tokens:number, score: number) {
|
||||||
|
try {
|
||||||
|
await supabase_service.from("test_suite_logs").insert([{log:dataError, time_taken, num_tokens, score}]);
|
||||||
|
} catch (error) {
|
||||||
|
console.error(`Error logging to supabase: ${error}`);
|
||||||
|
}
|
||||||
|
}
|
47
apps/test-suite/utils/misc.ts
Normal file
47
apps/test-suite/utils/misc.ts
Normal file
|
@ -0,0 +1,47 @@
|
||||||
|
const getRandomLinksFromContent = async (options: {
|
||||||
|
content: string;
|
||||||
|
excludes: string[];
|
||||||
|
limit: number;
|
||||||
|
}): Promise<string[]> => {
|
||||||
|
const regex = /(?<=\()https:\/\/(.*?)(?=\))/g;
|
||||||
|
const links = options.content.match(regex);
|
||||||
|
const filteredLinks = links
|
||||||
|
? links.filter(
|
||||||
|
(link) => !options.excludes.some((exclude) => link.includes(exclude))
|
||||||
|
)
|
||||||
|
: [];
|
||||||
|
const uniqueLinks = [...new Set(filteredLinks)]; // Ensure all links are unique
|
||||||
|
const randomLinks = [];
|
||||||
|
while (randomLinks.length < options.limit && uniqueLinks.length > 0) {
|
||||||
|
const randomIndex = Math.floor(Math.random() * uniqueLinks.length);
|
||||||
|
randomLinks.push(uniqueLinks.splice(randomIndex, 1)[0]);
|
||||||
|
}
|
||||||
|
return randomLinks;
|
||||||
|
};
|
||||||
|
|
||||||
|
function fuzzyContains(options: {
|
||||||
|
largeText: string;
|
||||||
|
queryText: string;
|
||||||
|
threshold?: number;
|
||||||
|
}): boolean {
|
||||||
|
// Normalize texts: lowercasing and removing non-alphanumeric characters
|
||||||
|
const normalize = (text: string) =>
|
||||||
|
text.toLowerCase().replace(/[^a-z0-9]+/g, " ");
|
||||||
|
|
||||||
|
const normalizedLargeText = normalize(options.largeText);
|
||||||
|
const normalizedQueryText = normalize(options.queryText);
|
||||||
|
|
||||||
|
// Split the query into words
|
||||||
|
const queryWords = normalizedQueryText.split(/\s+/);
|
||||||
|
|
||||||
|
// Count how many query words are in the large text
|
||||||
|
const matchCount = queryWords.reduce((count, word) => {
|
||||||
|
return count + (normalizedLargeText.includes(word) ? 1 : 0);
|
||||||
|
}, 0);
|
||||||
|
|
||||||
|
// Calculate the percentage of words matched
|
||||||
|
const matchPercentage = matchCount / queryWords.length;
|
||||||
|
|
||||||
|
// Check if the match percentage meets or exceeds the threshold
|
||||||
|
return matchPercentage >= (options.threshold || 0.8);
|
||||||
|
}
|
56
apps/test-suite/utils/supabase.ts
Normal file
56
apps/test-suite/utils/supabase.ts
Normal file
|
@ -0,0 +1,56 @@
|
||||||
|
import { createClient, SupabaseClient } from "@supabase/supabase-js";
|
||||||
|
import "dotenv/config";
|
||||||
|
// SupabaseService class initializes the Supabase client conditionally based on environment variables.
|
||||||
|
class SupabaseService {
|
||||||
|
private client: SupabaseClient | null = null;
|
||||||
|
|
||||||
|
constructor() {
|
||||||
|
const supabaseUrl = process.env.SUPABASE_URL;
|
||||||
|
const supabaseServiceToken = process.env.SUPABASE_SERVICE_TOKEN;
|
||||||
|
// Only initialize the Supabase client if both URL and Service Token are provided.
|
||||||
|
if (process.env.USE_DB_AUTHENTICATION === "false") {
|
||||||
|
// Warn the user that Authentication is disabled by setting the client to null
|
||||||
|
console.warn(
|
||||||
|
"\x1b[33mAuthentication is disabled. Supabase client will not be initialized.\x1b[0m"
|
||||||
|
);
|
||||||
|
this.client = null;
|
||||||
|
} else if (!supabaseUrl || !supabaseServiceToken) {
|
||||||
|
console.error(
|
||||||
|
"\x1b[31mSupabase environment variables aren't configured correctly. Supabase client will not be initialized. Fix ENV configuration or disable DB authentication with USE_DB_AUTHENTICATION env variable\x1b[0m"
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
this.client = createClient(supabaseUrl, supabaseServiceToken);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Provides access to the initialized Supabase client, if available.
|
||||||
|
getClient(): SupabaseClient | null {
|
||||||
|
return this.client;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Using a Proxy to handle dynamic access to the Supabase client or service methods.
|
||||||
|
// This approach ensures that if Supabase is not configured, any attempt to use it will result in a clear error.
|
||||||
|
export const supabase_service: SupabaseClient = new Proxy(
|
||||||
|
new SupabaseService(),
|
||||||
|
{
|
||||||
|
get: function (target, prop, receiver) {
|
||||||
|
const client = target.getClient();
|
||||||
|
// If the Supabase client is not initialized, intercept property access to provide meaningful error feedback.
|
||||||
|
if (client === null) {
|
||||||
|
console.error(
|
||||||
|
"Attempted to access Supabase client when it's not configured."
|
||||||
|
);
|
||||||
|
return () => {
|
||||||
|
throw new Error("Supabase client is not configured.");
|
||||||
|
};
|
||||||
|
}
|
||||||
|
// Direct access to SupabaseService properties takes precedence.
|
||||||
|
if (prop in target) {
|
||||||
|
return Reflect.get(target, prop, receiver);
|
||||||
|
}
|
||||||
|
// Otherwise, delegate access to the Supabase client.
|
||||||
|
return Reflect.get(client, prop, receiver);
|
||||||
|
},
|
||||||
|
}
|
||||||
|
) as unknown as SupabaseClient;
|
16
apps/test-suite/utils/tokens.ts
Normal file
16
apps/test-suite/utils/tokens.ts
Normal file
|
@ -0,0 +1,16 @@
|
||||||
|
import { encoding_for_model } from "@dqbd/tiktoken";
|
||||||
|
import { TiktokenModel } from "@dqbd/tiktoken";
|
||||||
|
|
||||||
|
// This function calculates the number of tokens in a text string using GPT-3.5-turbo model
|
||||||
|
export function numTokensFromString(message: string, model: string): number {
|
||||||
|
const encoder = encoding_for_model(model as TiktokenModel);
|
||||||
|
|
||||||
|
// Encode the message into tokens
|
||||||
|
const tokens = encoder.encode(message);
|
||||||
|
|
||||||
|
// Free the encoder resources after use
|
||||||
|
encoder.free();
|
||||||
|
|
||||||
|
// Return the number of tokens
|
||||||
|
return tokens.length;
|
||||||
|
}
|
7
apps/test-suite/utils/types.ts
Normal file
7
apps/test-suite/utils/types.ts
Normal file
|
@ -0,0 +1,7 @@
|
||||||
|
export interface WebsiteScrapeError {
|
||||||
|
website: string;
|
||||||
|
prompt: string;
|
||||||
|
expected_output: string;
|
||||||
|
actual_output: string;
|
||||||
|
error: string;
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user