diff --git a/apps/api/src/services/queue-worker.ts b/apps/api/src/services/queue-worker.ts index 36928873..c0bab316 100644 --- a/apps/api/src/services/queue-worker.ts +++ b/apps/api/src/services/queue-worker.ts @@ -102,6 +102,8 @@ process.on("SIGTERM", () => { isShuttingDown = true; }); +let cantAcceptConnectionCount = 0; + const workerFun = async ( queue: Queue, processJobInternal: (token: string, job: Job) => Promise @@ -127,8 +129,19 @@ const workerFun = async ( const canAcceptConnection = await monitor.acceptConnection(); if (!canAcceptConnection) { console.log("Cant accept connection"); + cantAcceptConnectionCount++; + + if (cantAcceptConnectionCount >= 25) { + logger.error("WORKER STALLED", { + cpuUsage: await monitor.checkCpuUsage(), + memoryUsage: await monitor.checkMemoryUsage(), + }); + } + await sleep(cantAcceptConnectionInterval); // more sleep continue; + } else { + cantAcceptConnectionCount = 0; } const job = await worker.getNextJob(token); diff --git a/apps/api/src/services/system-monitor.ts b/apps/api/src/services/system-monitor.ts index 6a630701..b5e1bf29 100644 --- a/apps/api/src/services/system-monitor.ts +++ b/apps/api/src/services/system-monitor.ts @@ -40,7 +40,7 @@ class SystemMonitor { return SystemMonitor.instance; } - private async checkMemoryUsage() { + public async checkMemoryUsage() { if (IS_KUBERNETES) { return this._checkMemoryUsageKubernetes(); } @@ -102,7 +102,7 @@ class SystemMonitor { return usedMemoryPercentage; } - private async checkCpuUsage() { + public async checkCpuUsage() { if (IS_KUBERNETES) { return this._checkCpuUsageKubernetes(); }