From 7081beff1f96a55af1f81adc603ce00a50e60772 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gerg=C5=91=20M=C3=B3ricz?= Date: Tue, 12 Nov 2024 22:26:36 +0100 Subject: [PATCH] fix(scrapeURL/pdf): retry --- apps/api/src/scraper/scrapeURL/engines/pdf/index.ts | 2 ++ apps/api/src/scraper/scrapeURL/lib/fetch.ts | 7 ++++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/apps/api/src/scraper/scrapeURL/engines/pdf/index.ts b/apps/api/src/scraper/scrapeURL/engines/pdf/index.ts index 8b42ee71..d0591b57 100644 --- a/apps/api/src/scraper/scrapeURL/engines/pdf/index.ts +++ b/apps/api/src/scraper/scrapeURL/engines/pdf/index.ts @@ -62,6 +62,8 @@ async function scrapePDFWithLlamaParse(meta: Meta, tempFilePath: string): Promis schema: z.object({ markdown: z.string(), }), + tryCount: 16, + tryCooldown: 250, }); return { diff --git a/apps/api/src/scraper/scrapeURL/lib/fetch.ts b/apps/api/src/scraper/scrapeURL/lib/fetch.ts index 738e240e..09a280b8 100644 --- a/apps/api/src/scraper/scrapeURL/lib/fetch.ts +++ b/apps/api/src/scraper/scrapeURL/lib/fetch.ts @@ -15,6 +15,7 @@ export type RobustFetchParams> = { ignoreFailure?: boolean; requestId?: string; tryCount?: number; + tryCooldown?: number; }; export async function robustFetch, Output = z.infer>({ @@ -28,8 +29,9 @@ export async function robustFetch, Output = z.infer ignoreFailure = false, requestId = uuid(), tryCount = 1, + tryCooldown, }: RobustFetchParams): Promise { - const params = { url, logger, method, body, headers, schema, ignoreResponse, ignoreFailure, tryCount }; + const params = { url, logger, method, body, headers, schema, ignoreResponse, ignoreFailure, tryCount, tryCooldown }; let request: Response; try { @@ -86,6 +88,9 @@ export async function robustFetch, Output = z.infer if (request.status >= 300) { if (tryCount > 1) { logger.debug("Request sent failure status, trying " + (tryCount - 1) + " more times", { params, request, response, requestId }); + if (tryCooldown !== undefined) { + await new Promise((resolve) => setTimeout(() => resolve(null), tryCooldown)); + } return await robustFetch({ ...params, requestId,