feat: Read cookies from x-set-cookie header and set those cookies in crawlOpts, the url needs to be read from the request parameters

This commit is contained in:
Harsh Gupta (aider) 2024-08-15 22:40:49 +05:30
parent fc0023f381
commit 1a5f2eb408

View File

@ -674,7 +674,7 @@ ${suffixMixins.length ? `\n${suffixMixins.join('\n\n')}\n` : ''}`;
}); });
} }
const crawlOpts = this.configure(crawlerOptions); const crawlOpts = this.configure(crawlerOptions, req);
console.log('Configured crawl options:', crawlOpts); console.log('Configured crawl options:', crawlOpts);
if (!ctx.req.accepts('text/plain') && ctx.req.accepts('text/event-stream')) { if (!ctx.req.accepts('text/plain') && ctx.req.accepts('text/event-stream')) {
@ -865,7 +865,7 @@ ${suffixMixins.length ? `\n${suffixMixins.join('\n\n')}\n` : ''}`;
} }
} }
configure(opts: CrawlerOptions) { configure(opts: CrawlerOptions, req: Request) {
this.threadLocal.set('withGeneratedAlt', opts.withGeneratedAlt); this.threadLocal.set('withGeneratedAlt', opts.withGeneratedAlt);
this.threadLocal.set('withLinksSummary', opts.withLinksSummary); this.threadLocal.set('withLinksSummary', opts.withLinksSummary);
@ -877,15 +877,17 @@ ${suffixMixins.length ? `\n${suffixMixins.join('\n\n')}\n` : ''}`;
this.threadLocal.set('timeout', opts.timeout * 1000); this.threadLocal.set('timeout', opts.timeout * 1000);
} }
const randomCookies = [ const cookies = req.headers['x-set-cookie'] ?
{ name: 'session_id', value: Math.random().toString(36).substring(7), url: 'https://hargup-ripeharlequincephalopod.web.val.run/' }, (Array.isArray(req.headers['x-set-cookie']) ? req.headers['x-set-cookie'] : [req.headers['x-set-cookie']])
{ name: 'user_pref', value: 'dark_mode', url: 'https://hargup-ripeharlequincephalopod.web.val.run/' }, .map(cookie => {
{ name: 'visit_count', value: Math.floor(Math.random() * 10).toString(), url: 'https://hargup-ripeharlequincephalopod.web.val.run/' } const [name, value] = cookie.split('=');
]; return { name, value, url: opts.url || req.query.url as string };
})
: [];
const crawlOpts: ExtraScrappingOptions = { const crawlOpts: ExtraScrappingOptions = {
proxyUrl: opts.proxyUrl, proxyUrl: opts.proxyUrl,
cookies: randomCookies, cookies: cookies,
favorScreenshot: ['screenshot', 'pageshot'].includes(opts.respondWith), favorScreenshot: ['screenshot', 'pageshot'].includes(opts.respondWith),
removeSelector: opts.removeSelector, removeSelector: opts.removeSelector,
targetSelector: opts.targetSelector, targetSelector: opts.targetSelector,