Merge pull request #847 from mendableai/nsc/mobile-support
Some checks failed
Deploy Images to GHCR / push-app-image (push) Has been cancelled

Adds support for mobile web scraping + mobile screenshot
This commit is contained in:
Eric Ciarla 2024-11-02 11:16:33 -04:00 committed by GitHub
commit 2eff27ba43
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 9 additions and 3 deletions

View File

@ -116,6 +116,7 @@ export const scrapeOptions = z.object({
timeout: z.number().int().positive().finite().safe().default(30000), timeout: z.number().int().positive().finite().safe().default(30000),
waitFor: z.number().int().nonnegative().finite().safe().default(0), waitFor: z.number().int().nonnegative().finite().safe().default(0),
extract: extractOptions.optional(), extract: extractOptions.optional(),
mobile: z.boolean().default(false),
parsePDF: z.boolean().default(true), parsePDF: z.boolean().default(true),
actions: actionsSchema.optional(), actions: actionsSchema.optional(),
// New // New
@ -468,7 +469,8 @@ export function legacyScrapeOptions(x: ScrapeOptions): PageOptions {
parsePDF: x.parsePDF, parsePDF: x.parsePDF,
actions: x.actions as Action[], // no strict null checking grrrr - mogery actions: x.actions as Action[], // no strict null checking grrrr - mogery
geolocation: x.location ?? x.geolocation, geolocation: x.location ?? x.geolocation,
skipTlsVerification: x.skipTlsVerification skipTlsVerification: x.skipTlsVerification,
mobile: x.mobile,
}; };
} }

View File

@ -58,6 +58,7 @@ export type PageOptions = {
country?: string; country?: string;
}; };
skipTlsVerification?: boolean; skipTlsVerification?: boolean;
mobile?: boolean;
}; };
export type ExtractorOptions = { export type ExtractorOptions = {

View File

@ -595,6 +595,7 @@ export class WebScraperDataProvider {
actions: options.pageOptions?.actions ?? undefined, actions: options.pageOptions?.actions ?? undefined,
geolocation: options.pageOptions?.geolocation ?? undefined, geolocation: options.pageOptions?.geolocation ?? undefined,
skipTlsVerification: options.pageOptions?.skipTlsVerification ?? false, skipTlsVerification: options.pageOptions?.skipTlsVerification ?? false,
mobile: options.pageOptions?.mobile ?? false,
}; };
this.extractorOptions = options.extractorOptions ?? { mode: "markdown" }; this.extractorOptions = options.extractorOptions ?? { mode: "markdown" };
this.replaceAllPathsWithAbsolutePaths = this.replaceAllPathsWithAbsolutePaths =

View File

@ -28,7 +28,7 @@ export async function scrapWithFireEngine({
waitFor = 0, waitFor = 0,
screenshot = false, screenshot = false,
fullPageScreenshot = false, fullPageScreenshot = false,
pageOptions = { parsePDF: true, atsv: false, useFastMode: false, disableJsDom: false, geolocation: { country: "US" }, skipTlsVerification: false }, pageOptions = { parsePDF: true, atsv: false, useFastMode: false, disableJsDom: false, geolocation: { country: "US" }, skipTlsVerification: false, mobile: false },
fireEngineOptions = {}, fireEngineOptions = {},
headers, headers,
options, options,
@ -40,7 +40,7 @@ export async function scrapWithFireEngine({
waitFor?: number; waitFor?: number;
screenshot?: boolean; screenshot?: boolean;
fullPageScreenshot?: boolean; fullPageScreenshot?: boolean;
pageOptions?: { scrollXPaths?: string[]; parsePDF?: boolean, atsv?: boolean, useFastMode?: boolean, disableJsDom?: boolean, geolocation?: { country?: string }, skipTlsVerification?: boolean }; pageOptions?: { scrollXPaths?: string[]; parsePDF?: boolean, atsv?: boolean, useFastMode?: boolean, disableJsDom?: boolean, geolocation?: { country?: string }, skipTlsVerification?: boolean, mobile?: boolean };
fireEngineOptions?: FireEngineOptions; fireEngineOptions?: FireEngineOptions;
headers?: Record<string, string>; headers?: Record<string, string>;
options?: any; options?: any;
@ -115,6 +115,7 @@ export async function scrapWithFireEngine({
priority, priority,
engine, engine,
instantReturn: true, instantReturn: true,
mobile: pageOptions?.mobile ?? false,
...fireEngineOptionsParam, ...fireEngineOptionsParam,
atsv: pageOptions?.atsv ?? false, atsv: pageOptions?.atsv ?? false,
scrollXPaths: pageOptions?.scrollXPaths ?? [], scrollXPaths: pageOptions?.scrollXPaths ?? [],

View File

@ -159,6 +159,7 @@ export async function scrapSingleUrl(
actions: pageOptions.actions ?? undefined, actions: pageOptions.actions ?? undefined,
geolocation: pageOptions.geolocation ?? undefined, geolocation: pageOptions.geolocation ?? undefined,
skipTlsVerification: pageOptions.skipTlsVerification ?? false, skipTlsVerification: pageOptions.skipTlsVerification ?? false,
mobile: pageOptions.mobile ?? false,
} }
if (extractorOptions) { if (extractorOptions) {