mirror of
https://github.com/intergalacticalvariable/reader.git
synced 2024-11-16 11:42:32 +08:00
Parse request headers properly
This commit is contained in:
parent
19dc9df9cb
commit
7677ec77ce
|
@ -599,8 +599,8 @@ ${suffixMixins.length ? `\n${suffixMixins.join('\n\n')}\n` : ''}`;
|
||||||
// const rpcReflect: RPCReflection = {};
|
// const rpcReflect: RPCReflection = {};
|
||||||
const ctx = { req, res };
|
const ctx = { req, res };
|
||||||
console.log(`req.headers: ${JSON.stringify(req.headers)}`);
|
console.log(`req.headers: ${JSON.stringify(req.headers)}`);
|
||||||
const crawlerOptionsHeaderOnly = CrawlerOptionsHeaderOnly.from(req.headers);
|
const crawlerOptionsHeaderOnly = CrawlerOptionsHeaderOnly.from(req);
|
||||||
const crawlerOptionsParamsAllowed = CrawlerOptions.from(req.method === 'POST' ? req.body : req.query);
|
const crawlerOptionsParamsAllowed = CrawlerOptions.from(req.method === 'POST' ? req.body : req.query, req);
|
||||||
const noSlashURL = ctx.req.url.slice(1);
|
const noSlashURL = ctx.req.url.slice(1);
|
||||||
const crawlerOptions = ctx.req.method === 'GET' ? crawlerOptionsHeaderOnly : crawlerOptionsParamsAllowed;
|
const crawlerOptions = ctx.req.method === 'GET' ? crawlerOptionsHeaderOnly : crawlerOptionsParamsAllowed;
|
||||||
console.log('Crawler options:', crawlerOptions);
|
console.log('Crawler options:', crawlerOptions);
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
import { Also, AutoCastable, Prop, RPC_CALL_ENVIRONMENT } from 'civkit'; // Adjust the import based on where your decorators are defined
|
import { Also, AutoCastable, Prop, AutoCastableMetaClass, Constructor } from 'civkit'; // Adjust the import based on where your decorators are defined
|
||||||
import type { Request, Response } from 'express';
|
import type { Request, Response } from 'express';
|
||||||
import type { CookieParam } from 'puppeteer';
|
import type { CookieParam } from 'puppeteer';
|
||||||
import { parseString as parseSetCookieString } from 'set-cookie-parser';
|
import { parseString as parseSetCookieString } from 'set-cookie-parser';
|
||||||
|
@ -115,7 +115,7 @@ import { parseString as parseSetCookieString } from 'set-cookie-parser';
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
export class CrawlerOptions extends AutoCastable {
|
export class CrawlerOptions extends AutoCastable implements AutoCastableMetaClass {
|
||||||
|
|
||||||
@Prop()
|
@Prop()
|
||||||
url?: string;
|
url?: string;
|
||||||
|
@ -188,17 +188,15 @@ export class CrawlerOptions extends AutoCastable {
|
||||||
})
|
})
|
||||||
timeout?: number | null;
|
timeout?: number | null;
|
||||||
|
|
||||||
static override from(input: any) {
|
static override from<T extends CrawlerOptions>(this: Constructor<T>, input: any, ...args: any[]): T {
|
||||||
const instance = super.from(input) as CrawlerOptions;
|
const instance = super.from(input, ...args) as T;
|
||||||
const ctx = Reflect.get(input, RPC_CALL_ENVIRONMENT);
|
const req = args[0] as Request | undefined;
|
||||||
console.log('RPC_CALL_ENVIRONMENT:', ctx);
|
|
||||||
|
if (req) {
|
||||||
if (ctx && typeof ctx === 'object' && 'req' in ctx && 'res' in ctx) {
|
console.log('Request headers:', req.headers);
|
||||||
const typedCtx = ctx as { req: Request, res: Response };
|
|
||||||
console.log('Request headers:', typedCtx.req.headers);
|
|
||||||
|
|
||||||
const getHeader = (name: string): string | undefined => {
|
const getHeader = (name: string): string | undefined => {
|
||||||
const value = typedCtx.req.headers[name.toLowerCase()];
|
const value = req.headers[name.toLowerCase()];
|
||||||
return Array.isArray(value) ? value[0] : value;
|
return Array.isArray(value) ? value[0] : value;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -211,77 +209,86 @@ export class CrawlerOptions extends AutoCastable {
|
||||||
if (withGeneratedAlt !== undefined) {
|
if (withGeneratedAlt !== undefined) {
|
||||||
instance.withGeneratedAlt = withGeneratedAlt.toLowerCase() === 'true';
|
instance.withGeneratedAlt = withGeneratedAlt.toLowerCase() === 'true';
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
console.warn('Invalid or missing RPC_CALL_ENVIRONMENT');
|
|
||||||
}
|
|
||||||
const withLinksSummary = ctx?.req.get('x-with-links-summary');
|
|
||||||
if (withLinksSummary !== undefined) {
|
|
||||||
instance.withLinksSummary = Boolean(withLinksSummary);
|
|
||||||
}
|
|
||||||
const withImagesSummary = ctx?.req.get('x-with-images-summary');
|
|
||||||
if (withImagesSummary !== undefined) {
|
|
||||||
instance.withImagesSummary = Boolean(withImagesSummary);
|
|
||||||
}
|
|
||||||
const noCache = ctx?.req.get('x-no-cache');
|
|
||||||
if (noCache !== undefined) {
|
|
||||||
instance.noCache = Boolean(noCache);
|
|
||||||
}
|
|
||||||
if (instance.noCache && instance.cacheTolerance === undefined) {
|
|
||||||
instance.cacheTolerance = 0;
|
|
||||||
}
|
|
||||||
let cacheTolerance = parseInt(ctx?.req.get('x-cache-tolerance') || '');
|
|
||||||
if (!isNaN(cacheTolerance)) {
|
|
||||||
instance.cacheTolerance = cacheTolerance;
|
|
||||||
}
|
|
||||||
|
|
||||||
let timeoutSeconds = parseInt(ctx?.req.get('x-timeout') || '');
|
const withLinksSummary = getHeader('x-with-links-summary');
|
||||||
if (!isNaN(timeoutSeconds) && timeoutSeconds > 0) {
|
if (withLinksSummary !== undefined) {
|
||||||
instance.timeout = timeoutSeconds <= 180 ? timeoutSeconds : 180;
|
instance.withLinksSummary = Boolean(withLinksSummary);
|
||||||
} else if (ctx?.req.get('x-timeout')) {
|
}
|
||||||
instance.timeout = null;
|
|
||||||
}
|
|
||||||
|
|
||||||
const removeSelector = ctx?.req.get('x-remove-selector')?.split(', ');
|
const withImagesSummary = getHeader('x-with-images-summary');
|
||||||
instance.removeSelector ??= removeSelector;
|
if (withImagesSummary !== undefined) {
|
||||||
const targetSelector = ctx?.req.get('x-target-selector')?.split(', ');
|
instance.withImagesSummary = Boolean(withImagesSummary);
|
||||||
instance.targetSelector ??= targetSelector;
|
}
|
||||||
const waitForSelector = ctx?.req.get('x-wait-for-selector')?.split(', ');
|
|
||||||
instance.waitForSelector ??= waitForSelector || instance.targetSelector;
|
|
||||||
instance.targetSelector = filterSelector(instance.targetSelector);
|
|
||||||
const overrideUserAgent = ctx?.req.get('x-user-agent');
|
|
||||||
instance.userAgent ??= overrideUserAgent;
|
|
||||||
|
|
||||||
const keepImgDataUrl = ctx?.req.get('x-keep-img-data-url');
|
const noCache = getHeader('x-no-cache');
|
||||||
if (keepImgDataUrl !== undefined) {
|
if (noCache !== undefined) {
|
||||||
instance.keepImgDataUrl = Boolean(keepImgDataUrl);
|
instance.noCache = Boolean(noCache);
|
||||||
}
|
}
|
||||||
const withIframe = ctx?.req.get('x-with-iframe');
|
|
||||||
if (withIframe !== undefined) {
|
|
||||||
instance.withIframe = Boolean(withIframe);
|
|
||||||
}
|
|
||||||
if (instance.withIframe) {
|
|
||||||
instance.timeout ??= null;
|
|
||||||
}
|
|
||||||
|
|
||||||
const cookies: CookieParam[] = [];
|
if (instance.noCache && instance.cacheTolerance === undefined) {
|
||||||
const setCookieHeaders = ctx?.req.get('x-set-cookie')?.split(', ') || (instance.setCookies as any as string[]);
|
instance.cacheTolerance = 0;
|
||||||
if (Array.isArray(setCookieHeaders)) {
|
}
|
||||||
for (const setCookie of setCookieHeaders) {
|
|
||||||
|
let cacheTolerance = parseInt(getHeader('x-cache-tolerance') || '');
|
||||||
|
if (!isNaN(cacheTolerance)) {
|
||||||
|
instance.cacheTolerance = cacheTolerance;
|
||||||
|
}
|
||||||
|
|
||||||
|
let timeoutSeconds = parseInt(getHeader('x-timeout') || '');
|
||||||
|
if (!isNaN(timeoutSeconds) && timeoutSeconds > 0) {
|
||||||
|
instance.timeout = timeoutSeconds <= 180 ? timeoutSeconds : 180;
|
||||||
|
} else if (getHeader('x-timeout')) {
|
||||||
|
instance.timeout = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
const removeSelector = getHeader('x-remove-selector')?.split(', ');
|
||||||
|
instance.removeSelector ??= removeSelector;
|
||||||
|
|
||||||
|
const targetSelector = getHeader('x-target-selector')?.split(', ');
|
||||||
|
instance.targetSelector ??= targetSelector;
|
||||||
|
|
||||||
|
const waitForSelector = getHeader('x-wait-for-selector')?.split(', ');
|
||||||
|
instance.waitForSelector ??= waitForSelector || instance.targetSelector;
|
||||||
|
|
||||||
|
instance.targetSelector = filterSelector(instance.targetSelector);
|
||||||
|
|
||||||
|
const overrideUserAgent = getHeader('x-user-agent');
|
||||||
|
instance.userAgent ??= overrideUserAgent;
|
||||||
|
|
||||||
|
const keepImgDataUrl = getHeader('x-keep-img-data-url');
|
||||||
|
if (keepImgDataUrl !== undefined) {
|
||||||
|
instance.keepImgDataUrl = Boolean(keepImgDataUrl);
|
||||||
|
}
|
||||||
|
|
||||||
|
const withIframe = getHeader('x-with-iframe');
|
||||||
|
if (withIframe !== undefined) {
|
||||||
|
instance.withIframe = Boolean(withIframe);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (instance.withIframe) {
|
||||||
|
instance.timeout ??= null;
|
||||||
|
}
|
||||||
|
|
||||||
|
const cookies: CookieParam[] = [];
|
||||||
|
const setCookieHeaders = getHeader('x-set-cookie')?.split(', ') || (instance.setCookies as any as string[]);
|
||||||
|
if (Array.isArray(setCookieHeaders)) {
|
||||||
|
for (const setCookie of setCookieHeaders) {
|
||||||
|
cookies.push({
|
||||||
|
...parseSetCookieString(setCookie, { decodeValues: false }) as CookieParam,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
} else if (setCookieHeaders && typeof setCookieHeaders === 'string') {
|
||||||
cookies.push({
|
cookies.push({
|
||||||
...parseSetCookieString(setCookie, { decodeValues: false }) as CookieParam,
|
...parseSetCookieString(setCookieHeaders, { decodeValues: false }) as CookieParam,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
} else if (setCookieHeaders && typeof setCookieHeaders === 'string') {
|
|
||||||
cookies.push({
|
|
||||||
...parseSetCookieString(setCookieHeaders, { decodeValues: false }) as CookieParam,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
const proxyUrl = ctx?.req.get('x-proxy-url');
|
const proxyUrl = getHeader('x-proxy-url');
|
||||||
instance.proxyUrl ??= proxyUrl;
|
instance.proxyUrl ??= proxyUrl;
|
||||||
|
|
||||||
if (instance.cacheTolerance) {
|
if (instance.cacheTolerance) {
|
||||||
instance.cacheTolerance = instance.cacheTolerance * 1000;
|
instance.cacheTolerance = instance.cacheTolerance * 1000;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return instance;
|
return instance;
|
||||||
|
@ -289,12 +296,9 @@ export class CrawlerOptions extends AutoCastable {
|
||||||
}
|
}
|
||||||
|
|
||||||
export class CrawlerOptionsHeaderOnly extends CrawlerOptions {
|
export class CrawlerOptionsHeaderOnly extends CrawlerOptions {
|
||||||
static override from(input: any) {
|
static override from<T extends CrawlerOptionsHeaderOnly>(this: Constructor<T>, ...args: any[]): T {
|
||||||
const instance = super.from({
|
const req = args[0] as Request;
|
||||||
[RPC_CALL_ENVIRONMENT]: Reflect.get(input, RPC_CALL_ENVIRONMENT),
|
return super.from({}, req) as T;
|
||||||
}) as CrawlerOptionsHeaderOnly;
|
|
||||||
|
|
||||||
return instance;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user