mirror of
https://github.com/mendableai/firecrawl.git
synced 2024-11-16 11:42:24 +08:00
Improvements on response document types
This commit is contained in:
parent
a20d002a6b
commit
6e32522fa2
|
@ -1,11 +1,14 @@
|
|||
import FirecrawlApp, { JobStatusResponse } from '@mendable/firecrawl-js';
|
||||
import FirecrawlApp, { JobStatusResponse } from './firecrawl/src/index' //'@mendable/firecrawl-js';
|
||||
import { z } from "zod";
|
||||
|
||||
const app = new FirecrawlApp({apiKey: "fc-YOUR_API_KEY"});
|
||||
|
||||
// Scrape a website:
|
||||
const scrapeResult = await app.scrapeUrl('firecrawl.dev');
|
||||
console.log(scrapeResult.data.content)
|
||||
|
||||
if (scrapeResult.data) {
|
||||
console.log(scrapeResult.data.content)
|
||||
}
|
||||
|
||||
// Crawl a website:
|
||||
const crawlResult = await app.crawlUrl('mendable.ai', {crawlerOptions: {excludes: ['blog/*'], limit: 5}}, false);
|
||||
|
@ -23,12 +26,13 @@ while (true) {
|
|||
await new Promise(resolve => setTimeout(resolve, 1000)); // wait 1 second
|
||||
}
|
||||
|
||||
console.log(job.data[0].content);
|
||||
if (job.data) {
|
||||
console.log(job.data[0].content);
|
||||
}
|
||||
|
||||
// Search for a query:
|
||||
const query = 'what is mendable?'
|
||||
const searchResult = await app.search(query)
|
||||
console.log(searchResult)
|
||||
|
||||
// LLM Extraction:
|
||||
// Define schema to extract contents into using zod schema
|
||||
|
@ -50,7 +54,9 @@ let llmExtractionResult = await app.scrapeUrl("https://news.ycombinator.com", {
|
|||
extractorOptions: { extractionSchema: zodSchema },
|
||||
});
|
||||
|
||||
console.log(llmExtractionResult.data.llm_extraction);
|
||||
if (llmExtractionResult.data) {
|
||||
console.log(llmExtractionResult.data.llm_extraction);
|
||||
}
|
||||
|
||||
// Define schema to extract contents into using json schema
|
||||
const jsonSchema = {
|
||||
|
@ -80,4 +86,7 @@ llmExtractionResult = await app.scrapeUrl("https://news.ycombinator.com", {
|
|||
extractorOptions: { extractionSchema: jsonSchema },
|
||||
});
|
||||
|
||||
console.log(llmExtractionResult.data.llm_extraction);
|
||||
if (llmExtractionResult.data) {
|
||||
console.log(llmExtractionResult.data.llm_extraction);
|
||||
}
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"name": "@mendable/firecrawl-js",
|
||||
"version": "0.0.26",
|
||||
"version": "0.0.27",
|
||||
"description": "JavaScript SDK for Firecrawl API",
|
||||
"main": "build/index.js",
|
||||
"types": "types/index.d.ts",
|
||||
|
|
|
@ -10,56 +10,112 @@ export interface FirecrawlAppConfig {
|
|||
}
|
||||
|
||||
/**
|
||||
* Generic parameter interface.
|
||||
* Metadata for a Firecrawl document.
|
||||
*/
|
||||
export interface Params {
|
||||
export interface FirecrawlDocumentMetadata {
|
||||
title?: string;
|
||||
description?: string;
|
||||
language?: string;
|
||||
keywords?: string;
|
||||
robots?: string;
|
||||
ogTitle?: string;
|
||||
ogDescription?: string;
|
||||
ogUrl?: string;
|
||||
ogImage?: string;
|
||||
ogAudio?: string;
|
||||
ogDeterminer?: string;
|
||||
ogLocale?: string;
|
||||
ogLocaleAlternate?: string[];
|
||||
ogSiteName?: string;
|
||||
ogVideo?: string;
|
||||
dctermsCreated?: string;
|
||||
dcDateCreated?: string;
|
||||
dcDate?: string;
|
||||
dctermsType?: string;
|
||||
dcType?: string;
|
||||
dctermsAudience?: string;
|
||||
dctermsSubject?: string;
|
||||
dcSubject?: string;
|
||||
dcDescription?: string;
|
||||
dctermsKeywords?: string;
|
||||
modifiedTime?: string;
|
||||
publishedTime?: string;
|
||||
articleTag?: string;
|
||||
articleSection?: string;
|
||||
sourceURL?: string;
|
||||
pageStatusCode?: number;
|
||||
pageError?: string;
|
||||
[key: string]: any;
|
||||
extractorOptions?: {
|
||||
extractionSchema: z.ZodSchema | any;
|
||||
mode?: "llm-extraction";
|
||||
extractionPrompt?: string;
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Document interface for Firecrawl.
|
||||
*/
|
||||
export interface FirecrawlDocument {
|
||||
id?: string;
|
||||
url?: string;
|
||||
content: string;
|
||||
markdown?: string;
|
||||
html?: string;
|
||||
llm_extraction?: Record<string, any>;
|
||||
createdAt?: Date;
|
||||
updatedAt?: Date;
|
||||
type?: string;
|
||||
metadata: FirecrawlDocumentMetadata;
|
||||
childrenLinks?: string[];
|
||||
provider?: string;
|
||||
warning?: string;
|
||||
|
||||
index?: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Response interface for scraping operations.
|
||||
*/
|
||||
export interface ScrapeResponse {
|
||||
success: boolean;
|
||||
data?: any;
|
||||
error?: string;
|
||||
success: boolean;
|
||||
data?: FirecrawlDocument;
|
||||
error?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Response interface for searching operations.
|
||||
*/
|
||||
* Response interface for searching operations.
|
||||
*/
|
||||
export interface SearchResponse {
|
||||
success: boolean;
|
||||
data?: any;
|
||||
error?: string;
|
||||
success: boolean;
|
||||
data?: FirecrawlDocument[];
|
||||
error?: string;
|
||||
}
|
||||
/**
|
||||
* Response interface for crawling operations.
|
||||
*/
|
||||
* Response interface for crawling operations.
|
||||
*/
|
||||
export interface CrawlResponse {
|
||||
success: boolean;
|
||||
jobId?: string;
|
||||
data?: any;
|
||||
error?: string;
|
||||
success: boolean;
|
||||
jobId?: string;
|
||||
data?: FirecrawlDocument[];
|
||||
error?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Response interface for job status checks.
|
||||
*/
|
||||
* Response interface for job status checks.
|
||||
*/
|
||||
export interface JobStatusResponse {
|
||||
success: boolean;
|
||||
status: string;
|
||||
jobId?: string;
|
||||
data?: any;
|
||||
partial_data?: any,
|
||||
error?: string;
|
||||
success: boolean;
|
||||
status: string;
|
||||
jobId?: string;
|
||||
data?: FirecrawlDocument[];
|
||||
partial_data?: FirecrawlDocument[];
|
||||
error?: string;
|
||||
}
|
||||
/**
|
||||
* Generic parameter interface.
|
||||
*/
|
||||
export interface Params {
|
||||
[key: string]: any;
|
||||
extractorOptions?: {
|
||||
extractionSchema: z.ZodSchema | any;
|
||||
mode?: "llm-extraction";
|
||||
extractionPrompt?: string;
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Main class for interacting with the Firecrawl API.
|
||||
*/
|
||||
|
|
131
apps/js-sdk/firecrawl/types/index.d.ts
vendored
131
apps/js-sdk/firecrawl/types/index.d.ts
vendored
|
@ -8,8 +8,101 @@ export interface FirecrawlAppConfig {
|
|||
apiUrl?: string | null;
|
||||
}
|
||||
/**
|
||||
* Generic parameter interface.
|
||||
* Metadata for a Firecrawl document.
|
||||
*/
|
||||
export interface FirecrawlDocumentMetadata {
|
||||
title?: string;
|
||||
description?: string;
|
||||
language?: string;
|
||||
keywords?: string;
|
||||
robots?: string;
|
||||
ogTitle?: string;
|
||||
ogDescription?: string;
|
||||
ogUrl?: string;
|
||||
ogImage?: string;
|
||||
ogAudio?: string;
|
||||
ogDeterminer?: string;
|
||||
ogLocale?: string;
|
||||
ogLocaleAlternate?: string[];
|
||||
ogSiteName?: string;
|
||||
ogVideo?: string;
|
||||
dctermsCreated?: string;
|
||||
dcDateCreated?: string;
|
||||
dcDate?: string;
|
||||
dctermsType?: string;
|
||||
dcType?: string;
|
||||
dctermsAudience?: string;
|
||||
dctermsSubject?: string;
|
||||
dcSubject?: string;
|
||||
dcDescription?: string;
|
||||
dctermsKeywords?: string;
|
||||
modifiedTime?: string;
|
||||
publishedTime?: string;
|
||||
articleTag?: string;
|
||||
articleSection?: string;
|
||||
sourceURL?: string;
|
||||
pageStatusCode?: number;
|
||||
pageError?: string;
|
||||
[key: string]: any;
|
||||
}
|
||||
/**
|
||||
* Document interface for Firecrawl.
|
||||
*/
|
||||
export interface FirecrawlDocument {
|
||||
id?: string;
|
||||
url?: string;
|
||||
content: string;
|
||||
markdown?: string;
|
||||
html?: string;
|
||||
llm_extraction?: Record<string, any>;
|
||||
createdAt?: Date;
|
||||
updatedAt?: Date;
|
||||
type?: string;
|
||||
metadata: FirecrawlDocumentMetadata;
|
||||
childrenLinks?: string[];
|
||||
provider?: string;
|
||||
warning?: string;
|
||||
index?: number;
|
||||
}
|
||||
/**
|
||||
* Response interface for scraping operations.
|
||||
*/
|
||||
export interface ScrapeResponse {
|
||||
success: boolean;
|
||||
data?: FirecrawlDocument;
|
||||
error?: string;
|
||||
}
|
||||
/**
|
||||
* Response interface for searching operations.
|
||||
*/
|
||||
export interface SearchResponse {
|
||||
success: boolean;
|
||||
data?: FirecrawlDocument[];
|
||||
error?: string;
|
||||
}
|
||||
/**
|
||||
* Response interface for crawling operations.
|
||||
*/
|
||||
export interface CrawlResponse {
|
||||
success: boolean;
|
||||
jobId?: string;
|
||||
data?: FirecrawlDocument[];
|
||||
error?: string;
|
||||
}
|
||||
/**
|
||||
* Response interface for job status checks.
|
||||
*/
|
||||
export interface JobStatusResponse {
|
||||
success: boolean;
|
||||
status: string;
|
||||
jobId?: string;
|
||||
data?: FirecrawlDocument[];
|
||||
partial_data?: FirecrawlDocument[];
|
||||
error?: string;
|
||||
}
|
||||
/**
|
||||
* Generic parameter interface.
|
||||
*/
|
||||
export interface Params {
|
||||
[key: string]: any;
|
||||
extractorOptions?: {
|
||||
|
@ -18,42 +111,6 @@ export interface Params {
|
|||
extractionPrompt?: string;
|
||||
};
|
||||
}
|
||||
/**
|
||||
* Response interface for scraping operations.
|
||||
*/
|
||||
export interface ScrapeResponse {
|
||||
success: boolean;
|
||||
data?: any;
|
||||
error?: string;
|
||||
}
|
||||
/**
|
||||
* Response interface for searching operations.
|
||||
*/
|
||||
export interface SearchResponse {
|
||||
success: boolean;
|
||||
data?: any;
|
||||
error?: string;
|
||||
}
|
||||
/**
|
||||
* Response interface for crawling operations.
|
||||
*/
|
||||
export interface CrawlResponse {
|
||||
success: boolean;
|
||||
jobId?: string;
|
||||
data?: any;
|
||||
error?: string;
|
||||
}
|
||||
/**
|
||||
* Response interface for job status checks.
|
||||
*/
|
||||
export interface JobStatusResponse {
|
||||
success: boolean;
|
||||
status: string;
|
||||
jobId?: string;
|
||||
data?: any;
|
||||
partial_data?: any;
|
||||
error?: string;
|
||||
}
|
||||
/**
|
||||
* Main class for interacting with the Firecrawl API.
|
||||
*/
|
||||
|
|
Loading…
Reference in New Issue
Block a user