mirror of
https://github.com/intergalacticalvariable/reader.git
synced 2024-11-16 11:42:32 +08:00
fix: another approach to suspected DoS abuse
This commit is contained in:
parent
e658e8102c
commit
0a33207f8f
|
@ -375,12 +375,22 @@ export class CrawlerHost extends RPCHost {
|
|||
let contentText = '';
|
||||
const imageSummary = {} as { [k: string]: string; };
|
||||
const imageIdxTrack = new Map<string, number[]>();
|
||||
const uid = this.threadLocal.get('uid');
|
||||
do {
|
||||
if (pdfMode) {
|
||||
contentText = snapshot.parsed?.content || snapshot.text;
|
||||
break;
|
||||
}
|
||||
|
||||
if (
|
||||
snapshot.maxElemDepth! > 256 ||
|
||||
(!uid && snapshot.elemCount! > 10_000) ||
|
||||
snapshot.text.length > 70_000
|
||||
) {
|
||||
contentText = snapshot.text;
|
||||
break;
|
||||
}
|
||||
|
||||
const jsDomElementOfHTML = this.jsdomControl.snippetToElement(snapshot.html, snapshot.href);
|
||||
let toBeTurnedToMd = jsDomElementOfHTML;
|
||||
let turnDownService = this.getTurndown({ url: nominalUrl, imgDataUrlToObjectUrl });
|
||||
|
|
|
@ -334,6 +334,15 @@ export class SearcherHost extends RPCHost {
|
|||
r.description = upstreamSearchResult.description;
|
||||
|
||||
return r;
|
||||
}).catch((err)=> {
|
||||
this.logger.error(`Failed to format snapshot for ${urls[i].href}`, { err: marshalErrorLike(err) });
|
||||
|
||||
return {
|
||||
url: upstreamSearchResult.url,
|
||||
title: upstreamSearchResult.title,
|
||||
description: upstreamSearchResult.description,
|
||||
content: x.text,
|
||||
};
|
||||
});
|
||||
});
|
||||
|
||||
|
|
|
@ -11,7 +11,6 @@ import puppeteerBlockResources from 'puppeteer-extra-plugin-block-resources';
|
|||
import puppeteerPageProxy from 'puppeteer-extra-plugin-page-proxy';
|
||||
import { SecurityCompromiseError, ServiceCrashedError } from '../shared/lib/errors';
|
||||
import { TimeoutError } from 'puppeteer';
|
||||
import { AsyncContext } from '../shared';
|
||||
const tldExtract = require('tld-extract');
|
||||
|
||||
const READABILITY_JS = fs.readFileSync(require.resolve('@mozilla/readability/Readability.js'), 'utf-8');
|
||||
|
@ -129,7 +128,7 @@ function getMaxDepthAndCountUsingTreeWalker(root) {
|
|||
NodeFilter.SHOW_ELEMENT,
|
||||
(node) => {
|
||||
const nodeName = node.nodeName.toLowerCase();
|
||||
return (nodeName === 'svg' || nodeName === 'code') ? NodeFilter.FILTER_REJECT : NodeFilter.FILTER_ACCEPT;
|
||||
return (nodeName === 'svg') ? NodeFilter.FILTER_REJECT : NodeFilter.FILTER_ACCEPT;
|
||||
},
|
||||
false
|
||||
);
|
||||
|
@ -215,7 +214,6 @@ export class PuppeteerControl extends AsyncService {
|
|||
|
||||
constructor(
|
||||
protected globalLogger: Logger,
|
||||
protected threadLocal: AsyncContext,
|
||||
) {
|
||||
super(...arguments);
|
||||
this.setMaxListeners(2 * Math.floor(os.totalmem() / (256 * 1024 * 1024)) + 1); 148 - 95;
|
||||
|
@ -491,17 +489,13 @@ document.addEventListener('load', handlePageLoad);
|
|||
if (snapshot === s) {
|
||||
return;
|
||||
}
|
||||
snapshot = s;
|
||||
if (s?.maxElemDepth && s.maxElemDepth > 256) {
|
||||
page.emit('abuse', { url, page, sn, reason: `DoS attack suspected: DOM tree too deep` });
|
||||
return;
|
||||
}
|
||||
if (s?.elemCount && s.elemCount > 20_000) {
|
||||
if (!this.threadLocal.get('uid')) {
|
||||
page.emit('abuse', { url, page, sn, reason: `DoS attack suspected: too many DOM elements` });
|
||||
return;
|
||||
}
|
||||
if (s?.elemCount && s.elemCount > 10_000) {
|
||||
return;
|
||||
}
|
||||
snapshot = s;
|
||||
nextSnapshotDeferred.resolve(s);
|
||||
nextSnapshotDeferred = Defer();
|
||||
this.once('crippled', crippleListener);
|
||||
|
|
Loading…
Reference in New Issue
Block a user