Parse request headers properly

This commit is contained in:
Harsh Gupta (aider) 2024-08-15 15:03:41 +05:30 committed by Harsh Gupta
parent 19dc9df9cb
commit 7677ec77ce
2 changed files with 85 additions and 81 deletions

View File

@ -599,8 +599,8 @@ ${suffixMixins.length ? `\n${suffixMixins.join('\n\n')}\n` : ''}`;
// const rpcReflect: RPCReflection = {}; // const rpcReflect: RPCReflection = {};
const ctx = { req, res }; const ctx = { req, res };
console.log(`req.headers: ${JSON.stringify(req.headers)}`); console.log(`req.headers: ${JSON.stringify(req.headers)}`);
const crawlerOptionsHeaderOnly = CrawlerOptionsHeaderOnly.from(req.headers); const crawlerOptionsHeaderOnly = CrawlerOptionsHeaderOnly.from(req);
const crawlerOptionsParamsAllowed = CrawlerOptions.from(req.method === 'POST' ? req.body : req.query); const crawlerOptionsParamsAllowed = CrawlerOptions.from(req.method === 'POST' ? req.body : req.query, req);
const noSlashURL = ctx.req.url.slice(1); const noSlashURL = ctx.req.url.slice(1);
const crawlerOptions = ctx.req.method === 'GET' ? crawlerOptionsHeaderOnly : crawlerOptionsParamsAllowed; const crawlerOptions = ctx.req.method === 'GET' ? crawlerOptionsHeaderOnly : crawlerOptionsParamsAllowed;
console.log('Crawler options:', crawlerOptions); console.log('Crawler options:', crawlerOptions);

View File

@ -1,4 +1,4 @@
import { Also, AutoCastable, Prop, RPC_CALL_ENVIRONMENT } from 'civkit'; // Adjust the import based on where your decorators are defined import { Also, AutoCastable, Prop, AutoCastableMetaClass, Constructor } from 'civkit'; // Adjust the import based on where your decorators are defined
import type { Request, Response } from 'express'; import type { Request, Response } from 'express';
import type { CookieParam } from 'puppeteer'; import type { CookieParam } from 'puppeteer';
import { parseString as parseSetCookieString } from 'set-cookie-parser'; import { parseString as parseSetCookieString } from 'set-cookie-parser';
@ -115,7 +115,7 @@ import { parseString as parseSetCookieString } from 'set-cookie-parser';
} }
} }
}) })
export class CrawlerOptions extends AutoCastable { export class CrawlerOptions extends AutoCastable implements AutoCastableMetaClass {
@Prop() @Prop()
url?: string; url?: string;
@ -188,17 +188,15 @@ export class CrawlerOptions extends AutoCastable {
}) })
timeout?: number | null; timeout?: number | null;
static override from(input: any) { static override from<T extends CrawlerOptions>(this: Constructor<T>, input: any, ...args: any[]): T {
const instance = super.from(input) as CrawlerOptions; const instance = super.from(input, ...args) as T;
const ctx = Reflect.get(input, RPC_CALL_ENVIRONMENT); const req = args[0] as Request | undefined;
console.log('RPC_CALL_ENVIRONMENT:', ctx);
if (req) {
if (ctx && typeof ctx === 'object' && 'req' in ctx && 'res' in ctx) { console.log('Request headers:', req.headers);
const typedCtx = ctx as { req: Request, res: Response };
console.log('Request headers:', typedCtx.req.headers);
const getHeader = (name: string): string | undefined => { const getHeader = (name: string): string | undefined => {
const value = typedCtx.req.headers[name.toLowerCase()]; const value = req.headers[name.toLowerCase()];
return Array.isArray(value) ? value[0] : value; return Array.isArray(value) ? value[0] : value;
}; };
@ -211,77 +209,86 @@ export class CrawlerOptions extends AutoCastable {
if (withGeneratedAlt !== undefined) { if (withGeneratedAlt !== undefined) {
instance.withGeneratedAlt = withGeneratedAlt.toLowerCase() === 'true'; instance.withGeneratedAlt = withGeneratedAlt.toLowerCase() === 'true';
} }
} else {
console.warn('Invalid or missing RPC_CALL_ENVIRONMENT');
}
const withLinksSummary = ctx?.req.get('x-with-links-summary');
if (withLinksSummary !== undefined) {
instance.withLinksSummary = Boolean(withLinksSummary);
}
const withImagesSummary = ctx?.req.get('x-with-images-summary');
if (withImagesSummary !== undefined) {
instance.withImagesSummary = Boolean(withImagesSummary);
}
const noCache = ctx?.req.get('x-no-cache');
if (noCache !== undefined) {
instance.noCache = Boolean(noCache);
}
if (instance.noCache && instance.cacheTolerance === undefined) {
instance.cacheTolerance = 0;
}
let cacheTolerance = parseInt(ctx?.req.get('x-cache-tolerance') || '');
if (!isNaN(cacheTolerance)) {
instance.cacheTolerance = cacheTolerance;
}
let timeoutSeconds = parseInt(ctx?.req.get('x-timeout') || ''); const withLinksSummary = getHeader('x-with-links-summary');
if (!isNaN(timeoutSeconds) && timeoutSeconds > 0) { if (withLinksSummary !== undefined) {
instance.timeout = timeoutSeconds <= 180 ? timeoutSeconds : 180; instance.withLinksSummary = Boolean(withLinksSummary);
} else if (ctx?.req.get('x-timeout')) { }
instance.timeout = null;
}
const removeSelector = ctx?.req.get('x-remove-selector')?.split(', '); const withImagesSummary = getHeader('x-with-images-summary');
instance.removeSelector ??= removeSelector; if (withImagesSummary !== undefined) {
const targetSelector = ctx?.req.get('x-target-selector')?.split(', '); instance.withImagesSummary = Boolean(withImagesSummary);
instance.targetSelector ??= targetSelector; }
const waitForSelector = ctx?.req.get('x-wait-for-selector')?.split(', ');
instance.waitForSelector ??= waitForSelector || instance.targetSelector;
instance.targetSelector = filterSelector(instance.targetSelector);
const overrideUserAgent = ctx?.req.get('x-user-agent');
instance.userAgent ??= overrideUserAgent;
const keepImgDataUrl = ctx?.req.get('x-keep-img-data-url'); const noCache = getHeader('x-no-cache');
if (keepImgDataUrl !== undefined) { if (noCache !== undefined) {
instance.keepImgDataUrl = Boolean(keepImgDataUrl); instance.noCache = Boolean(noCache);
} }
const withIframe = ctx?.req.get('x-with-iframe');
if (withIframe !== undefined) {
instance.withIframe = Boolean(withIframe);
}
if (instance.withIframe) {
instance.timeout ??= null;
}
const cookies: CookieParam[] = []; if (instance.noCache && instance.cacheTolerance === undefined) {
const setCookieHeaders = ctx?.req.get('x-set-cookie')?.split(', ') || (instance.setCookies as any as string[]); instance.cacheTolerance = 0;
if (Array.isArray(setCookieHeaders)) { }
for (const setCookie of setCookieHeaders) {
let cacheTolerance = parseInt(getHeader('x-cache-tolerance') || '');
if (!isNaN(cacheTolerance)) {
instance.cacheTolerance = cacheTolerance;
}
let timeoutSeconds = parseInt(getHeader('x-timeout') || '');
if (!isNaN(timeoutSeconds) && timeoutSeconds > 0) {
instance.timeout = timeoutSeconds <= 180 ? timeoutSeconds : 180;
} else if (getHeader('x-timeout')) {
instance.timeout = null;
}
const removeSelector = getHeader('x-remove-selector')?.split(', ');
instance.removeSelector ??= removeSelector;
const targetSelector = getHeader('x-target-selector')?.split(', ');
instance.targetSelector ??= targetSelector;
const waitForSelector = getHeader('x-wait-for-selector')?.split(', ');
instance.waitForSelector ??= waitForSelector || instance.targetSelector;
instance.targetSelector = filterSelector(instance.targetSelector);
const overrideUserAgent = getHeader('x-user-agent');
instance.userAgent ??= overrideUserAgent;
const keepImgDataUrl = getHeader('x-keep-img-data-url');
if (keepImgDataUrl !== undefined) {
instance.keepImgDataUrl = Boolean(keepImgDataUrl);
}
const withIframe = getHeader('x-with-iframe');
if (withIframe !== undefined) {
instance.withIframe = Boolean(withIframe);
}
if (instance.withIframe) {
instance.timeout ??= null;
}
const cookies: CookieParam[] = [];
const setCookieHeaders = getHeader('x-set-cookie')?.split(', ') || (instance.setCookies as any as string[]);
if (Array.isArray(setCookieHeaders)) {
for (const setCookie of setCookieHeaders) {
cookies.push({
...parseSetCookieString(setCookie, { decodeValues: false }) as CookieParam,
});
}
} else if (setCookieHeaders && typeof setCookieHeaders === 'string') {
cookies.push({ cookies.push({
...parseSetCookieString(setCookie, { decodeValues: false }) as CookieParam, ...parseSetCookieString(setCookieHeaders, { decodeValues: false }) as CookieParam,
}); });
} }
} else if (setCookieHeaders && typeof setCookieHeaders === 'string') {
cookies.push({
...parseSetCookieString(setCookieHeaders, { decodeValues: false }) as CookieParam,
});
}
const proxyUrl = ctx?.req.get('x-proxy-url'); const proxyUrl = getHeader('x-proxy-url');
instance.proxyUrl ??= proxyUrl; instance.proxyUrl ??= proxyUrl;
if (instance.cacheTolerance) { if (instance.cacheTolerance) {
instance.cacheTolerance = instance.cacheTolerance * 1000; instance.cacheTolerance = instance.cacheTolerance * 1000;
}
} }
return instance; return instance;
@ -289,12 +296,9 @@ export class CrawlerOptions extends AutoCastable {
} }
export class CrawlerOptionsHeaderOnly extends CrawlerOptions { export class CrawlerOptionsHeaderOnly extends CrawlerOptions {
static override from(input: any) { static override from<T extends CrawlerOptionsHeaderOnly>(this: Constructor<T>, ...args: any[]): T {
const instance = super.from({ const req = args[0] as Request;
[RPC_CALL_ENVIRONMENT]: Reflect.get(input, RPC_CALL_ENVIRONMENT), return super.from({}, req) as T;
}) as CrawlerOptionsHeaderOnly;
return instance;
} }
} }