mirror of
https://github.com/mendableai/firecrawl.git
synced 2024-11-16 11:42:24 +08:00
fix crawl option conversion
Some checks are pending
STAGING Deploy Images to GHCR / push-app-image (push) Waiting to run
Some checks are pending
STAGING Deploy Images to GHCR / push-app-image (push) Waiting to run
This commit is contained in:
parent
2a96717f67
commit
cd534326ba
|
@ -15,7 +15,7 @@ import { getScrapeQueue } from "../../../src/services/queue-service";
|
||||||
import { checkAndUpdateURL } from "../../../src/lib/validateUrl";
|
import { checkAndUpdateURL } from "../../../src/lib/validateUrl";
|
||||||
import * as Sentry from "@sentry/node";
|
import * as Sentry from "@sentry/node";
|
||||||
import { getJobPriority } from "../../lib/job-priority";
|
import { getJobPriority } from "../../lib/job-priority";
|
||||||
import { fromLegacyCrawlerOptions, fromLegacyScrapeOptions, url as urlSchema } from "../v1/types";
|
import { fromLegacyScrapeOptions, url as urlSchema } from "../v1/types";
|
||||||
import { ZodError } from "zod";
|
import { ZodError } from "zod";
|
||||||
|
|
||||||
export async function crawlController(req: Request, res: Response) {
|
export async function crawlController(req: Request, res: Response) {
|
||||||
|
@ -140,7 +140,7 @@ export async function crawlController(req: Request, res: Response) {
|
||||||
|
|
||||||
const sc: StoredCrawl = {
|
const sc: StoredCrawl = {
|
||||||
originUrl: url,
|
originUrl: url,
|
||||||
crawlerOptions: fromLegacyCrawlerOptions(crawlerOptions),
|
crawlerOptions,
|
||||||
scrapeOptions,
|
scrapeOptions,
|
||||||
internalOptions,
|
internalOptions,
|
||||||
team_id,
|
team_id,
|
||||||
|
@ -177,7 +177,7 @@ export async function crawlController(req: Request, res: Response) {
|
||||||
data: {
|
data: {
|
||||||
url,
|
url,
|
||||||
mode: "single_urls",
|
mode: "single_urls",
|
||||||
crawlerOptions: crawlerOptions,
|
crawlerOptions,
|
||||||
team_id,
|
team_id,
|
||||||
plan,
|
plan,
|
||||||
pageOptions: pageOptions,
|
pageOptions: pageOptions,
|
||||||
|
|
|
@ -8,7 +8,7 @@ import { addCrawlJob, crawlToCrawler, lockURL, saveCrawl, StoredCrawl } from "..
|
||||||
import { addScrapeJob } from "../../../src/services/queue-jobs";
|
import { addScrapeJob } from "../../../src/services/queue-jobs";
|
||||||
import { checkAndUpdateURL } from "../../../src/lib/validateUrl";
|
import { checkAndUpdateURL } from "../../../src/lib/validateUrl";
|
||||||
import * as Sentry from "@sentry/node";
|
import * as Sentry from "@sentry/node";
|
||||||
import { fromLegacyCrawlerOptions, fromLegacyScrapeOptions } from "../v1/types";
|
import { fromLegacyScrapeOptions } from "../v1/types";
|
||||||
|
|
||||||
export async function crawlPreviewController(req: Request, res: Response) {
|
export async function crawlPreviewController(req: Request, res: Response) {
|
||||||
try {
|
try {
|
||||||
|
@ -91,7 +91,7 @@ export async function crawlPreviewController(req: Request, res: Response) {
|
||||||
|
|
||||||
const sc: StoredCrawl = {
|
const sc: StoredCrawl = {
|
||||||
originUrl: url,
|
originUrl: url,
|
||||||
crawlerOptions: fromLegacyCrawlerOptions(crawlerOptions),
|
crawlerOptions,
|
||||||
scrapeOptions,
|
scrapeOptions,
|
||||||
internalOptions,
|
internalOptions,
|
||||||
team_id,
|
team_id,
|
||||||
|
|
|
@ -5,6 +5,7 @@ import {
|
||||||
crawlRequestSchema,
|
crawlRequestSchema,
|
||||||
CrawlResponse,
|
CrawlResponse,
|
||||||
RequestWithAuth,
|
RequestWithAuth,
|
||||||
|
toLegacyCrawlerOptions,
|
||||||
} from "./types";
|
} from "./types";
|
||||||
import {
|
import {
|
||||||
addCrawlJob,
|
addCrawlJob,
|
||||||
|
@ -70,7 +71,7 @@ export async function crawlController(
|
||||||
|
|
||||||
const sc: StoredCrawl = {
|
const sc: StoredCrawl = {
|
||||||
originUrl: req.body.url,
|
originUrl: req.body.url,
|
||||||
crawlerOptions,
|
crawlerOptions: toLegacyCrawlerOptions(crawlerOptions),
|
||||||
scrapeOptions,
|
scrapeOptions,
|
||||||
internalOptions: {},
|
internalOptions: {},
|
||||||
team_id: req.auth.team_id,
|
team_id: req.auth.team_id,
|
||||||
|
|
|
@ -440,6 +440,20 @@ export interface ResponseWithSentry<
|
||||||
sentry?: string,
|
sentry?: string,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export function toLegacyCrawlerOptions(x: CrawlerOptions) {
|
||||||
|
return {
|
||||||
|
includes: x.includePaths,
|
||||||
|
excludes: x.excludePaths,
|
||||||
|
maxCrawledLinks: x.limit,
|
||||||
|
maxDepth: x.maxDepth,
|
||||||
|
limit: x.limit,
|
||||||
|
generateImgAltText: false,
|
||||||
|
allowBackwardCrawling: x.allowBackwardLinks,
|
||||||
|
allowExternalContentLinks: x.allowExternalLinks,
|
||||||
|
ignoreSitemap: x.ignoreSitemap,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
export function fromLegacyCrawlerOptions(x: any): { crawlOptions: CrawlerOptions; internalOptions: InternalOptions } {
|
export function fromLegacyCrawlerOptions(x: any): { crawlOptions: CrawlerOptions; internalOptions: InternalOptions } {
|
||||||
return {
|
return {
|
||||||
crawlOptions: crawlerOptions.parse({
|
crawlOptions: crawlerOptions.parse({
|
||||||
|
@ -493,10 +507,10 @@ export function fromLegacyScrapeOptions(pageOptions: PageOptions, extractorOptio
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
export function fromLegacyCombo(pageOptions: PageOptions, extractorOptions: ExtractorOptions | undefined, timeout: number | undefined, crawlerOptions: any): { scrapeOptions: ScrapeOptions, crawlOptions: CrawlerOptions, internalOptions: InternalOptions} {
|
export function fromLegacyCombo(pageOptions: PageOptions, extractorOptions: ExtractorOptions | undefined, timeout: number | undefined, crawlerOptions: any): { scrapeOptions: ScrapeOptions, internalOptions: InternalOptions} {
|
||||||
const { scrapeOptions, internalOptions: i1 } = fromLegacyScrapeOptions(pageOptions, extractorOptions, timeout);
|
const { scrapeOptions, internalOptions: i1 } = fromLegacyScrapeOptions(pageOptions, extractorOptions, timeout);
|
||||||
const { crawlOptions, internalOptions: i2 } = fromLegacyCrawlerOptions(crawlerOptions);
|
const { internalOptions: i2 } = fromLegacyCrawlerOptions(crawlerOptions);
|
||||||
return { scrapeOptions, crawlOptions, internalOptions: Object.assign(i1, i2) };
|
return { scrapeOptions, internalOptions: Object.assign(i1, i2) };
|
||||||
}
|
}
|
||||||
|
|
||||||
export function toLegacyDocument(document: Document, internalOptions: InternalOptions): V0Document | { url: string; } {
|
export function toLegacyDocument(document: Document, internalOptions: InternalOptions): V0Document | { url: string; } {
|
||||||
|
|
Loading…
Reference in New Issue
Block a user