mirror of
https://github.com/mendableai/firecrawl.git
synced 2024-11-16 11:42:24 +08:00
Merge branch 'v1-webscraper' of https://github.com/mendableai/firecrawl into v1-webscraper
This commit is contained in:
commit
674adee144
|
@ -33,6 +33,8 @@ const url = z.preprocess(
|
|||
)
|
||||
);
|
||||
|
||||
const strictMessage = "Unrecognized key in body -- please review the v1 API documentation for request body changes";
|
||||
|
||||
export const scrapeOptions = z.object({
|
||||
formats: z
|
||||
.enum([
|
||||
|
@ -53,14 +55,14 @@ export const scrapeOptions = z.object({
|
|||
timeout: z.number().int().positive().finite().safe().default(30000), // default?
|
||||
waitFor: z.number().int().nonnegative().finite().safe().default(0),
|
||||
parsePDF: z.boolean().default(true),
|
||||
});
|
||||
}).strict(strictMessage);
|
||||
|
||||
export type ScrapeOptions = z.infer<typeof scrapeOptions>;
|
||||
|
||||
export const scrapeRequestSchema = scrapeOptions.extend({
|
||||
url,
|
||||
origin: z.string().optional().default("api"),
|
||||
});
|
||||
}).strict(strictMessage);
|
||||
|
||||
// export type ScrapeRequest = {
|
||||
// url: string;
|
||||
|
@ -83,7 +85,7 @@ const crawlerOptions = z.object({
|
|||
allowBackwardLinks: z.boolean().default(false), // >> TODO: CHANGE THIS NAME???
|
||||
allowExternalLinks: z.boolean().default(false),
|
||||
ignoreSitemap: z.boolean().default(true),
|
||||
});
|
||||
}).strict(strictMessage);
|
||||
|
||||
// export type CrawlerOptions = {
|
||||
// includePaths?: string[];
|
||||
|
@ -97,14 +99,13 @@ const crawlerOptions = z.object({
|
|||
|
||||
export type CrawlerOptions = z.infer<typeof crawlerOptions>;
|
||||
|
||||
export const crawlRequestSchema = z.object({
|
||||
export const crawlRequestSchema = crawlerOptions.extend({
|
||||
url,
|
||||
origin: z.string().optional().default("api"),
|
||||
crawlerOptions: crawlerOptions.default({}), // TODO: Get rid of this
|
||||
scrapeOptions: scrapeOptions.omit({ timeout: true }).default({}),
|
||||
webhook: z.string().url().optional(),
|
||||
limit: z.number().default(10000), //
|
||||
});
|
||||
limit: z.number().default(10000),
|
||||
}).strict(strictMessage);
|
||||
|
||||
// export type CrawlRequest = {
|
||||
// url: string;
|
||||
|
@ -120,7 +121,7 @@ export const mapRequestSchema = crawlerOptions.extend({
|
|||
includeSubdomains: z.boolean().default(true),
|
||||
search: z.string().optional(),
|
||||
ignoreSitemap: z.boolean().default(false),
|
||||
});
|
||||
}).strict(strictMessage);
|
||||
|
||||
// export type MapRequest = {
|
||||
// url: string;
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
import "dotenv/config";
|
||||
import { CustomError } from "../lib/custom-error";
|
||||
import {
|
||||
getScrapeQueue,
|
||||
redisConnection,
|
||||
scrapeQueueName,
|
||||
} from "./queue-service";
|
||||
import "dotenv/config";
|
||||
import { logtail } from "./logtail";
|
||||
import { startWebScraperPipeline } from "../main/runWebScraper";
|
||||
import { callWebhook } from "./webhook";
|
||||
|
|
Loading…
Reference in New Issue
Block a user