From 867636d037908f9c149a246ba36143429350ff80 Mon Sep 17 00:00:00 2001 From: "yanlong.wang" Date: Mon, 29 Apr 2024 18:54:51 +0800 Subject: [PATCH] fix: apply rate limit to 100qpm per IP --- backend/functions/src/cloud-functions/crawler.ts | 11 ++++++++++- thinapps-shared | 2 +- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/backend/functions/src/cloud-functions/crawler.ts b/backend/functions/src/cloud-functions/crawler.ts index 0ea08f6..7a353e4 100644 --- a/backend/functions/src/cloud-functions/crawler.ts +++ b/backend/functions/src/cloud-functions/crawler.ts @@ -6,6 +6,7 @@ import { } from 'civkit'; import { singleton } from 'tsyringe'; import { CloudHTTPv2, Ctx, FirebaseStorageBucketControl, Logger, OutputServerEventStream, RPCReflect } from '../shared'; +import { RateLimitControl } from '../shared/services/rate-limit'; import _ from 'lodash'; import { PageSnapshot, PuppeteerControl, ScrappingOptions } from '../services/puppeteer'; import { Request, Response } from 'express'; @@ -36,6 +37,7 @@ export class CrawlerHost extends RPCHost { protected puppeteerControl: PuppeteerControl, protected altTextService: AltTextService, protected firebaseObjectStorage: FirebaseStorageBucketControl, + protected rateLimitControl: RateLimitControl, ) { super(...arguments); @@ -113,7 +115,7 @@ export class CrawlerHost extends RPCHost { } const toBeTurnedToMd = mode === 'markdown' ? snapshot.html : snapshot.parsed?.content; - let turnDownService = mode === 'markdown' ? this.getTurndown() : this.getTurndown('without any rule'); + let turnDownService = mode === 'markdown' ? this.getTurndown() : this.getTurndown('without any rule'); for (const plugin of this.turnDownPlugins) { turnDownService = turnDownService.use(plugin); } @@ -295,6 +297,13 @@ ${this.content} res: Response, }, ) { + if (ctx.req.ip) { + await this.rateLimitControl.simpleRpcIPBasedLimit(rpcReflect, ctx.req.ip, ['CRAWL'], [ + // 100 requests per minute + new Date(Date.now() - 60 * 1000), 100 + ]); + } + const noSlashURL = ctx.req.url.slice(1); if (!noSlashURL) { return assignTransferProtocolMeta(`[Usage] https://r.jina.ai/YOUR_URL diff --git a/thinapps-shared b/thinapps-shared index b165c1c..e681cf8 160000 --- a/thinapps-shared +++ b/thinapps-shared @@ -1 +1 @@ -Subproject commit b165c1cb0e21b7b8762a23b8ce88219aa532c293 +Subproject commit e681cf89bd21d77469dd286b2348e4cf5fce76e7