Parse request headers properly

This commit is contained in:
Harsh Gupta (aider) 2024-08-15 15:03:41 +05:30 committed by Harsh Gupta
parent 19dc9df9cb
commit 7677ec77ce
2 changed files with 85 additions and 81 deletions

View File

@ -599,8 +599,8 @@ ${suffixMixins.length ? `\n${suffixMixins.join('\n\n')}\n` : ''}`;
// const rpcReflect: RPCReflection = {};
const ctx = { req, res };
console.log(`req.headers: ${JSON.stringify(req.headers)}`);
const crawlerOptionsHeaderOnly = CrawlerOptionsHeaderOnly.from(req.headers);
const crawlerOptionsParamsAllowed = CrawlerOptions.from(req.method === 'POST' ? req.body : req.query);
const crawlerOptionsHeaderOnly = CrawlerOptionsHeaderOnly.from(req);
const crawlerOptionsParamsAllowed = CrawlerOptions.from(req.method === 'POST' ? req.body : req.query, req);
const noSlashURL = ctx.req.url.slice(1);
const crawlerOptions = ctx.req.method === 'GET' ? crawlerOptionsHeaderOnly : crawlerOptionsParamsAllowed;
console.log('Crawler options:', crawlerOptions);

View File

@ -1,4 +1,4 @@
import { Also, AutoCastable, Prop, RPC_CALL_ENVIRONMENT } from 'civkit'; // Adjust the import based on where your decorators are defined
import { Also, AutoCastable, Prop, AutoCastableMetaClass, Constructor } from 'civkit'; // Adjust the import based on where your decorators are defined
import type { Request, Response } from 'express';
import type { CookieParam } from 'puppeteer';
import { parseString as parseSetCookieString } from 'set-cookie-parser';
@ -115,7 +115,7 @@ import { parseString as parseSetCookieString } from 'set-cookie-parser';
}
}
})
export class CrawlerOptions extends AutoCastable {
export class CrawlerOptions extends AutoCastable implements AutoCastableMetaClass {
@Prop()
url?: string;
@ -188,17 +188,15 @@ export class CrawlerOptions extends AutoCastable {
})
timeout?: number | null;
static override from(input: any) {
const instance = super.from(input) as CrawlerOptions;
const ctx = Reflect.get(input, RPC_CALL_ENVIRONMENT);
console.log('RPC_CALL_ENVIRONMENT:', ctx);
static override from<T extends CrawlerOptions>(this: Constructor<T>, input: any, ...args: any[]): T {
const instance = super.from(input, ...args) as T;
const req = args[0] as Request | undefined;
if (ctx && typeof ctx === 'object' && 'req' in ctx && 'res' in ctx) {
const typedCtx = ctx as { req: Request, res: Response };
console.log('Request headers:', typedCtx.req.headers);
if (req) {
console.log('Request headers:', req.headers);
const getHeader = (name: string): string | undefined => {
const value = typedCtx.req.headers[name.toLowerCase()];
const value = req.headers[name.toLowerCase()];
return Array.isArray(value) ? value[0] : value;
};
@ -211,60 +209,68 @@ export class CrawlerOptions extends AutoCastable {
if (withGeneratedAlt !== undefined) {
instance.withGeneratedAlt = withGeneratedAlt.toLowerCase() === 'true';
}
} else {
console.warn('Invalid or missing RPC_CALL_ENVIRONMENT');
}
const withLinksSummary = ctx?.req.get('x-with-links-summary');
const withLinksSummary = getHeader('x-with-links-summary');
if (withLinksSummary !== undefined) {
instance.withLinksSummary = Boolean(withLinksSummary);
}
const withImagesSummary = ctx?.req.get('x-with-images-summary');
const withImagesSummary = getHeader('x-with-images-summary');
if (withImagesSummary !== undefined) {
instance.withImagesSummary = Boolean(withImagesSummary);
}
const noCache = ctx?.req.get('x-no-cache');
const noCache = getHeader('x-no-cache');
if (noCache !== undefined) {
instance.noCache = Boolean(noCache);
}
if (instance.noCache && instance.cacheTolerance === undefined) {
instance.cacheTolerance = 0;
}
let cacheTolerance = parseInt(ctx?.req.get('x-cache-tolerance') || '');
let cacheTolerance = parseInt(getHeader('x-cache-tolerance') || '');
if (!isNaN(cacheTolerance)) {
instance.cacheTolerance = cacheTolerance;
}
let timeoutSeconds = parseInt(ctx?.req.get('x-timeout') || '');
let timeoutSeconds = parseInt(getHeader('x-timeout') || '');
if (!isNaN(timeoutSeconds) && timeoutSeconds > 0) {
instance.timeout = timeoutSeconds <= 180 ? timeoutSeconds : 180;
} else if (ctx?.req.get('x-timeout')) {
} else if (getHeader('x-timeout')) {
instance.timeout = null;
}
const removeSelector = ctx?.req.get('x-remove-selector')?.split(', ');
const removeSelector = getHeader('x-remove-selector')?.split(', ');
instance.removeSelector ??= removeSelector;
const targetSelector = ctx?.req.get('x-target-selector')?.split(', ');
const targetSelector = getHeader('x-target-selector')?.split(', ');
instance.targetSelector ??= targetSelector;
const waitForSelector = ctx?.req.get('x-wait-for-selector')?.split(', ');
const waitForSelector = getHeader('x-wait-for-selector')?.split(', ');
instance.waitForSelector ??= waitForSelector || instance.targetSelector;
instance.targetSelector = filterSelector(instance.targetSelector);
const overrideUserAgent = ctx?.req.get('x-user-agent');
const overrideUserAgent = getHeader('x-user-agent');
instance.userAgent ??= overrideUserAgent;
const keepImgDataUrl = ctx?.req.get('x-keep-img-data-url');
const keepImgDataUrl = getHeader('x-keep-img-data-url');
if (keepImgDataUrl !== undefined) {
instance.keepImgDataUrl = Boolean(keepImgDataUrl);
}
const withIframe = ctx?.req.get('x-with-iframe');
const withIframe = getHeader('x-with-iframe');
if (withIframe !== undefined) {
instance.withIframe = Boolean(withIframe);
}
if (instance.withIframe) {
instance.timeout ??= null;
}
const cookies: CookieParam[] = [];
const setCookieHeaders = ctx?.req.get('x-set-cookie')?.split(', ') || (instance.setCookies as any as string[]);
const setCookieHeaders = getHeader('x-set-cookie')?.split(', ') || (instance.setCookies as any as string[]);
if (Array.isArray(setCookieHeaders)) {
for (const setCookie of setCookieHeaders) {
cookies.push({
@ -277,24 +283,22 @@ export class CrawlerOptions extends AutoCastable {
});
}
const proxyUrl = ctx?.req.get('x-proxy-url');
const proxyUrl = getHeader('x-proxy-url');
instance.proxyUrl ??= proxyUrl;
if (instance.cacheTolerance) {
instance.cacheTolerance = instance.cacheTolerance * 1000;
}
}
return instance;
}
}
export class CrawlerOptionsHeaderOnly extends CrawlerOptions {
static override from(input: any) {
const instance = super.from({
[RPC_CALL_ENVIRONMENT]: Reflect.get(input, RPC_CALL_ENVIRONMENT),
}) as CrawlerOptionsHeaderOnly;
return instance;
static override from<T extends CrawlerOptionsHeaderOnly>(this: Constructor<T>, ...args: any[]): T {
const req = args[0] as Request;
return super.from({}, req) as T;
}
}