From fc0023f38138880b7ad2433e6388fbe3f9b52fb3 Mon Sep 17 00:00:00 2001 From: Harsh Gupta Date: Thu, 15 Aug 2024 22:33:58 +0530 Subject: [PATCH] manually set cookie --- backend/functions/src/cloud-functions/crawler.ts | 8 +++++++- backend/functions/src/dto/scrapping-options.ts | 2 +- backend/functions/src/services/puppeteer.ts | 1 + 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/backend/functions/src/cloud-functions/crawler.ts b/backend/functions/src/cloud-functions/crawler.ts index 78c4e2a..bd180ea 100644 --- a/backend/functions/src/cloud-functions/crawler.ts +++ b/backend/functions/src/cloud-functions/crawler.ts @@ -877,9 +877,15 @@ ${suffixMixins.length ? `\n${suffixMixins.join('\n\n')}\n` : ''}`; this.threadLocal.set('timeout', opts.timeout * 1000); } + const randomCookies = [ + { name: 'session_id', value: Math.random().toString(36).substring(7), url: 'https://hargup-ripeharlequincephalopod.web.val.run/' }, + { name: 'user_pref', value: 'dark_mode', url: 'https://hargup-ripeharlequincephalopod.web.val.run/' }, + { name: 'visit_count', value: Math.floor(Math.random() * 10).toString(), url: 'https://hargup-ripeharlequincephalopod.web.val.run/' } + ]; + const crawlOpts: ExtraScrappingOptions = { proxyUrl: opts.proxyUrl, - cookies: opts.setCookies, + cookies: randomCookies, favorScreenshot: ['screenshot', 'pageshot'].includes(opts.respondWith), removeSelector: opts.removeSelector, targetSelector: opts.targetSelector, diff --git a/backend/functions/src/dto/scrapping-options.ts b/backend/functions/src/dto/scrapping-options.ts index 60800e0..b2a9202 100644 --- a/backend/functions/src/dto/scrapping-options.ts +++ b/backend/functions/src/dto/scrapping-options.ts @@ -191,7 +191,7 @@ export class CrawlerOptions extends AutoCastable implements AutoCastableMetaClas static override from(this: Constructor, input: any, ...args: any[]): T { const instance = super.from(input, ...args) as T; const req = args[0] as Request | undefined; - + if (req) { console.log('Request headers:', req.headers); diff --git a/backend/functions/src/services/puppeteer.ts b/backend/functions/src/services/puppeteer.ts index 8d53311..05921e5 100644 --- a/backend/functions/src/services/puppeteer.ts +++ b/backend/functions/src/services/puppeteer.ts @@ -463,6 +463,7 @@ document.addEventListener('load', handlePageLoad); async *scrap(parsedUrl: URL, options?: ScrappingOptions): AsyncGenerator { // parsedUrl.search = ''; + console.log('Scraping options:', options); const url = parsedUrl.toString(); let snapshot: PageSnapshot | undefined;