mirror of
https://github.com/mendableai/firecrawl.git
synced 2024-11-16 11:42:24 +08:00
Nick: geolocation
This commit is contained in:
parent
f49552e413
commit
b4f6a0f919
|
@ -4,6 +4,7 @@ import { isUrlBlocked } from "../../scraper/WebScraper/utils/blocklist";
|
|||
import { Action, ExtractorOptions, PageOptions } from "../../lib/entities";
|
||||
import { protocolIncluded, checkUrl } from "../../lib/validateUrl";
|
||||
import { PlanType } from "../../types";
|
||||
import { countries } from "../../lib/validate-country";
|
||||
|
||||
export type Format =
|
||||
| "markdown"
|
||||
|
@ -108,6 +109,14 @@ export const scrapeOptions = z.object({
|
|||
extract: extractOptions.optional(),
|
||||
parsePDF: z.boolean().default(true),
|
||||
actions: actionsSchema.optional(),
|
||||
geolocation: z.object({
|
||||
country: z.string().optional().refine(
|
||||
(val) => !val || Object.keys(countries).includes(val.toUpperCase()),
|
||||
{
|
||||
message: "Invalid country code. Please use a valid ISO 3166-1 alpha-2 country code.",
|
||||
}
|
||||
).transform(val => val ? val.toUpperCase() : 'US')
|
||||
}).optional(),
|
||||
}).strict(strictMessage)
|
||||
|
||||
|
||||
|
@ -421,6 +430,7 @@ export function legacyScrapeOptions(x: ScrapeOptions): PageOptions {
|
|||
fullPageScreenshot: x.formats.includes("screenshot@fullPage"),
|
||||
parsePDF: x.parsePDF,
|
||||
actions: x.actions as Action[], // no strict null checking grrrr - mogery
|
||||
geolocation: x.geolocation,
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
@ -51,6 +51,9 @@ export type PageOptions = {
|
|||
disableJsDom?: boolean; // beta
|
||||
atsv?: boolean; // anti-bot solver, beta
|
||||
actions?: Action[]; // beta
|
||||
geolocation?: {
|
||||
country?: string;
|
||||
};
|
||||
};
|
||||
|
||||
export type ExtractorOptions = {
|
||||
|
|
2261
apps/api/src/lib/validate-country.ts
Normal file
2261
apps/api/src/lib/validate-country.ts
Normal file
File diff suppressed because it is too large
Load Diff
|
@ -593,6 +593,7 @@ export class WebScraperDataProvider {
|
|||
disableJsDom: options.pageOptions?.disableJsDom ?? false,
|
||||
atsv: options.pageOptions?.atsv ?? false,
|
||||
actions: options.pageOptions?.actions ?? undefined,
|
||||
geolocation: options.pageOptions?.geolocation ?? undefined,
|
||||
};
|
||||
this.extractorOptions = options.extractorOptions ?? { mode: "markdown" };
|
||||
this.replaceAllPathsWithAbsolutePaths =
|
||||
|
|
|
@ -28,7 +28,7 @@ export async function scrapWithFireEngine({
|
|||
waitFor = 0,
|
||||
screenshot = false,
|
||||
fullPageScreenshot = false,
|
||||
pageOptions = { parsePDF: true, atsv: false, useFastMode: false, disableJsDom: false },
|
||||
pageOptions = { parsePDF: true, atsv: false, useFastMode: false, disableJsDom: false, geolocation: { country: "us" } },
|
||||
fireEngineOptions = {},
|
||||
headers,
|
||||
options,
|
||||
|
@ -40,7 +40,7 @@ export async function scrapWithFireEngine({
|
|||
waitFor?: number;
|
||||
screenshot?: boolean;
|
||||
fullPageScreenshot?: boolean;
|
||||
pageOptions?: { scrollXPaths?: string[]; parsePDF?: boolean, atsv?: boolean, useFastMode?: boolean, disableJsDom?: boolean };
|
||||
pageOptions?: { scrollXPaths?: string[]; parsePDF?: boolean, atsv?: boolean, useFastMode?: boolean, disableJsDom?: boolean, geolocation?: { country?: string } };
|
||||
fireEngineOptions?: FireEngineOptions;
|
||||
headers?: Record<string, string>;
|
||||
options?: any;
|
||||
|
@ -118,6 +118,7 @@ export async function scrapWithFireEngine({
|
|||
...fireEngineOptionsParam,
|
||||
atsv: pageOptions?.atsv ?? false,
|
||||
scrollXPaths: pageOptions?.scrollXPaths ?? [],
|
||||
geolocation: pageOptions?.geolocation,
|
||||
actions: actions,
|
||||
},
|
||||
{
|
||||
|
|
|
@ -156,6 +156,7 @@ export async function scrapSingleUrl(
|
|||
disableJsDom: pageOptions.disableJsDom ?? false,
|
||||
atsv: pageOptions.atsv ?? false,
|
||||
actions: pageOptions.actions ?? undefined,
|
||||
geolocation: pageOptions.geolocation ?? undefined,
|
||||
}
|
||||
|
||||
if (extractorOptions) {
|
||||
|
|
Loading…
Reference in New Issue
Block a user