fix: abuse blocker

This commit is contained in:
Yanlong Wang 2024-06-01 02:01:12 +08:00
parent 249408df6b
commit d2bebec60f
No known key found for this signature in database
GPG Key ID: C0A623C0BADF9F37
4 changed files with 10 additions and 6 deletions

View File

@ -38,6 +38,7 @@
"set-cookie-parser": "^2.6.0",
"stripe": "^11.11.0",
"tiktoken": "^1.0.10",
"tld-extract": "^2.1.0",
"turndown": "^7.1.3",
"turndown-plugin-gfm": "^1.0.2",
"undici": "^5.24.0"
@ -11306,8 +11307,7 @@
"node_modules/tld-extract": {
"version": "2.1.0",
"resolved": "https://registry.npmjs.org/tld-extract/-/tld-extract-2.1.0.tgz",
"integrity": "sha512-Y9QHWIoDQPJJVm3/pOC7kOfOj7vsNSVZl4JGoEHb605FiwZgIfzSMyU0HC0wYw5Cx8435vaG1yGZtIm1yiQGOw==",
"optional": true
"integrity": "sha512-Y9QHWIoDQPJJVm3/pOC7kOfOj7vsNSVZl4JGoEHb605FiwZgIfzSMyU0HC0wYw5Cx8435vaG1yGZtIm1yiQGOw=="
},
"node_modules/tmpl": {
"version": "1.0.5",

View File

@ -58,6 +58,7 @@
"set-cookie-parser": "^2.6.0",
"stripe": "^11.11.0",
"tiktoken": "^1.0.10",
"tld-extract": "^2.1.0",
"turndown": "^7.1.3",
"turndown-plugin-gfm": "^1.0.2",
"undici": "^5.24.0"

View File

@ -65,7 +65,7 @@ export class CrawlerHost extends RPCHost {
cacheRetentionMs = 1000 * 3600 * 24 * 7;
cacheValidMs = 1000 * 3600;
urlValidMs = 1000 * 3600 * 4;
abuseBlockMs = 1000 * 3600 * 24;
abuseBlockMs = 1000 * 3600;
constructor(
protected globalLogger: Logger,

View File

@ -12,6 +12,7 @@ import puppeteerBlockResources from 'puppeteer-extra-plugin-block-resources';
import puppeteerPageProxy from 'puppeteer-extra-plugin-page-proxy';
import { SecurityCompromiseError, ServiceCrashedError } from '../shared/lib/errors';
import { Readability } from '@mozilla/readability';
const tldExtract = require('tld-extract');
const READABILITY_JS = fs.readFileSync(require.resolve('@mozilla/readability/Readability.js'), 'utf-8');
@ -279,8 +280,10 @@ function giveSnapshot(stopActiveSnapshot) {
if (!requestUrl.startsWith("http:") && !requestUrl.startsWith("https:") && requestUrl !== 'about:blank') {
return req.abort('blockedbyclient', 1000);
}
const tldParsed = tldExtract(requestUrl);
domainSet.add(tldParsed.domain);
const parsedUrl = new URL(requestUrl);
domainSet.add(parsedUrl.hostname);
if (
parsedUrl.hostname === 'localhost' ||
@ -291,13 +294,13 @@ function giveSnapshot(stopActiveSnapshot) {
return req.abort('blockedbyclient', 1000);
}
if (reqCounter > 200) {
if (reqCounter > 2000) {
page.emit('abuse', { url: requestUrl, page, sn, reason: `DDoS attack suspected: Too many requests: ${reqCounter}` });
return req.abort('blockedbyclient', 1000);
}
if (domainSet.size > 51) {
if (domainSet.size > 200) {
page.emit('abuse', { url: requestUrl, page, sn, reason: `DDoS attack suspected: Too many domains (${domainSet.size})` });
return req.abort('blockedbyclient', 1000);