firecrawl/apps/js-sdk/example.js

85 lines
2.2 KiB
JavaScript
Raw Normal View History

2024-05-23 22:47:04 +08:00
import { v4 as uuidv4 } from 'uuid';
2024-04-17 00:31:16 +08:00
import FirecrawlApp from '@mendable/firecrawl-js';
2024-05-09 21:36:56 +08:00
import { z } from "zod";
2024-04-16 22:38:22 +08:00
2024-05-09 21:36:56 +08:00
const app = new FirecrawlApp({apiKey: "fc-YOUR_API_KEY"});
2024-04-16 22:38:22 +08:00
2024-05-09 21:36:56 +08:00
// Scrape a website:
const scrapeResult = await app.scrapeUrl('firecrawl.dev');
console.log(scrapeResult.data.content)
// Crawl a website:
2024-05-23 22:47:04 +08:00
const idempotencyKey = uuidv4(); // optional
const crawlResult = await app.crawlUrl('mendable.ai', {crawlerOptions: {excludes: ['blog/*'], limit: 5}}, false, 2, idempotencyKey);
2024-04-16 22:38:22 +08:00
console.log(crawlResult)
const jobId = await crawlResult['jobId'];
console.log(jobId);
let job;
while (true) {
job = await app.checkCrawlStatus(jobId);
if (job.status == 'completed') {
break;
}
await new Promise(resolve => setTimeout(resolve, 1000)); // wait 1 second
}
2024-05-09 21:36:56 +08:00
console.log(job.data[0].content);
// Search for a query:
const query = 'what is mendable?'
const searchResult = await app.search(query)
console.log(searchResult)
// LLM Extraction:
// Define schema to extract contents into using zod schema
const zodSchema = z.object({
top: z
.array(
z.object({
title: z.string(),
points: z.number(),
by: z.string(),
commentsURL: z.string(),
})
)
.length(5)
.describe("Top 5 stories on Hacker News"),
});
let llmExtractionResult = await app.scrapeUrl("https://news.ycombinator.com", {
extractorOptions: { extractionSchema: zodSchema },
});
console.log(llmExtractionResult.data.llm_extraction);
// Define schema to extract contents into using json schema
const jsonSchema = {
"type": "object",
"properties": {
"top": {
"type": "array",
"items": {
"type": "object",
"properties": {
"title": {"type": "string"},
"points": {"type": "number"},
"by": {"type": "string"},
"commentsURL": {"type": "string"}
},
"required": ["title", "points", "by", "commentsURL"]
},
"minItems": 5,
"maxItems": 5,
"description": "Top 5 stories on Hacker News"
}
},
"required": ["top"]
}
llmExtractionResult = await app.scrapeUrl("https://news.ycombinator.com", {
extractorOptions: { extractionSchema: jsonSchema },
});
console.log(llmExtractionResult.data.llm_extraction);