mirror of
https://github.com/mendableai/firecrawl.git
synced 2024-11-16 11:42:24 +08:00
Nick: geolocation
This commit is contained in:
parent
f49552e413
commit
b4f6a0f919
|
@ -4,6 +4,7 @@ import { isUrlBlocked } from "../../scraper/WebScraper/utils/blocklist";
|
||||||
import { Action, ExtractorOptions, PageOptions } from "../../lib/entities";
|
import { Action, ExtractorOptions, PageOptions } from "../../lib/entities";
|
||||||
import { protocolIncluded, checkUrl } from "../../lib/validateUrl";
|
import { protocolIncluded, checkUrl } from "../../lib/validateUrl";
|
||||||
import { PlanType } from "../../types";
|
import { PlanType } from "../../types";
|
||||||
|
import { countries } from "../../lib/validate-country";
|
||||||
|
|
||||||
export type Format =
|
export type Format =
|
||||||
| "markdown"
|
| "markdown"
|
||||||
|
@ -108,6 +109,14 @@ export const scrapeOptions = z.object({
|
||||||
extract: extractOptions.optional(),
|
extract: extractOptions.optional(),
|
||||||
parsePDF: z.boolean().default(true),
|
parsePDF: z.boolean().default(true),
|
||||||
actions: actionsSchema.optional(),
|
actions: actionsSchema.optional(),
|
||||||
|
geolocation: z.object({
|
||||||
|
country: z.string().optional().refine(
|
||||||
|
(val) => !val || Object.keys(countries).includes(val.toUpperCase()),
|
||||||
|
{
|
||||||
|
message: "Invalid country code. Please use a valid ISO 3166-1 alpha-2 country code.",
|
||||||
|
}
|
||||||
|
).transform(val => val ? val.toUpperCase() : 'US')
|
||||||
|
}).optional(),
|
||||||
}).strict(strictMessage)
|
}).strict(strictMessage)
|
||||||
|
|
||||||
|
|
||||||
|
@ -421,6 +430,7 @@ export function legacyScrapeOptions(x: ScrapeOptions): PageOptions {
|
||||||
fullPageScreenshot: x.formats.includes("screenshot@fullPage"),
|
fullPageScreenshot: x.formats.includes("screenshot@fullPage"),
|
||||||
parsePDF: x.parsePDF,
|
parsePDF: x.parsePDF,
|
||||||
actions: x.actions as Action[], // no strict null checking grrrr - mogery
|
actions: x.actions as Action[], // no strict null checking grrrr - mogery
|
||||||
|
geolocation: x.geolocation,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -51,6 +51,9 @@ export type PageOptions = {
|
||||||
disableJsDom?: boolean; // beta
|
disableJsDom?: boolean; // beta
|
||||||
atsv?: boolean; // anti-bot solver, beta
|
atsv?: boolean; // anti-bot solver, beta
|
||||||
actions?: Action[]; // beta
|
actions?: Action[]; // beta
|
||||||
|
geolocation?: {
|
||||||
|
country?: string;
|
||||||
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
export type ExtractorOptions = {
|
export type ExtractorOptions = {
|
||||||
|
|
2261
apps/api/src/lib/validate-country.ts
Normal file
2261
apps/api/src/lib/validate-country.ts
Normal file
File diff suppressed because it is too large
Load Diff
|
@ -593,6 +593,7 @@ export class WebScraperDataProvider {
|
||||||
disableJsDom: options.pageOptions?.disableJsDom ?? false,
|
disableJsDom: options.pageOptions?.disableJsDom ?? false,
|
||||||
atsv: options.pageOptions?.atsv ?? false,
|
atsv: options.pageOptions?.atsv ?? false,
|
||||||
actions: options.pageOptions?.actions ?? undefined,
|
actions: options.pageOptions?.actions ?? undefined,
|
||||||
|
geolocation: options.pageOptions?.geolocation ?? undefined,
|
||||||
};
|
};
|
||||||
this.extractorOptions = options.extractorOptions ?? { mode: "markdown" };
|
this.extractorOptions = options.extractorOptions ?? { mode: "markdown" };
|
||||||
this.replaceAllPathsWithAbsolutePaths =
|
this.replaceAllPathsWithAbsolutePaths =
|
||||||
|
|
|
@ -28,7 +28,7 @@ export async function scrapWithFireEngine({
|
||||||
waitFor = 0,
|
waitFor = 0,
|
||||||
screenshot = false,
|
screenshot = false,
|
||||||
fullPageScreenshot = false,
|
fullPageScreenshot = false,
|
||||||
pageOptions = { parsePDF: true, atsv: false, useFastMode: false, disableJsDom: false },
|
pageOptions = { parsePDF: true, atsv: false, useFastMode: false, disableJsDom: false, geolocation: { country: "us" } },
|
||||||
fireEngineOptions = {},
|
fireEngineOptions = {},
|
||||||
headers,
|
headers,
|
||||||
options,
|
options,
|
||||||
|
@ -40,7 +40,7 @@ export async function scrapWithFireEngine({
|
||||||
waitFor?: number;
|
waitFor?: number;
|
||||||
screenshot?: boolean;
|
screenshot?: boolean;
|
||||||
fullPageScreenshot?: boolean;
|
fullPageScreenshot?: boolean;
|
||||||
pageOptions?: { scrollXPaths?: string[]; parsePDF?: boolean, atsv?: boolean, useFastMode?: boolean, disableJsDom?: boolean };
|
pageOptions?: { scrollXPaths?: string[]; parsePDF?: boolean, atsv?: boolean, useFastMode?: boolean, disableJsDom?: boolean, geolocation?: { country?: string } };
|
||||||
fireEngineOptions?: FireEngineOptions;
|
fireEngineOptions?: FireEngineOptions;
|
||||||
headers?: Record<string, string>;
|
headers?: Record<string, string>;
|
||||||
options?: any;
|
options?: any;
|
||||||
|
@ -118,6 +118,7 @@ export async function scrapWithFireEngine({
|
||||||
...fireEngineOptionsParam,
|
...fireEngineOptionsParam,
|
||||||
atsv: pageOptions?.atsv ?? false,
|
atsv: pageOptions?.atsv ?? false,
|
||||||
scrollXPaths: pageOptions?.scrollXPaths ?? [],
|
scrollXPaths: pageOptions?.scrollXPaths ?? [],
|
||||||
|
geolocation: pageOptions?.geolocation,
|
||||||
actions: actions,
|
actions: actions,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
|
@ -156,6 +156,7 @@ export async function scrapSingleUrl(
|
||||||
disableJsDom: pageOptions.disableJsDom ?? false,
|
disableJsDom: pageOptions.disableJsDom ?? false,
|
||||||
atsv: pageOptions.atsv ?? false,
|
atsv: pageOptions.atsv ?? false,
|
||||||
actions: pageOptions.actions ?? undefined,
|
actions: pageOptions.actions ?? undefined,
|
||||||
|
geolocation: pageOptions.geolocation ?? undefined,
|
||||||
}
|
}
|
||||||
|
|
||||||
if (extractorOptions) {
|
if (extractorOptions) {
|
||||||
|
|
Loading…
Reference in New Issue
Block a user