diff --git a/apps/api/src/services/logging/log_job.ts b/apps/api/src/services/logging/log_job.ts index 2a813b21..642b769d 100644 --- a/apps/api/src/services/logging/log_job.ts +++ b/apps/api/src/services/logging/log_job.ts @@ -7,7 +7,7 @@ import { logger } from "../../lib/logger"; import { configDotenv } from "dotenv"; configDotenv(); -export async function logJob(job: FirecrawlJob) { +export async function logJob(job: FirecrawlJob, force: boolean = false) { try { const useDbAuthentication = process.env.USE_DB_AUTHENTICATION === 'true'; if (!useDbAuthentication) { @@ -23,28 +23,52 @@ export async function logJob(job: FirecrawlJob) { job.scrapeOptions.headers["Authorization"] = "REDACTED"; job.docs = [{ content: "REDACTED DUE TO AUTHORIZATION HEADER", html: "REDACTED DUE TO AUTHORIZATION HEADER" }]; } + const jobColumn = { + job_id: job.job_id ? job.job_id : null, + success: job.success, + message: job.message, + num_docs: job.num_docs, + docs: job.docs, + time_taken: job.time_taken, + team_id: job.team_id === "preview" ? null : job.team_id, + mode: job.mode, + url: job.url, + crawler_options: job.crawlerOptions, + page_options: job.scrapeOptions, + origin: job.origin, + num_tokens: job.num_tokens, + retry: !!job.retry, + crawl_id: job.crawl_id, + }; - const { data, error } = await supabase_service - .from("firecrawl_jobs") - .insert([ - { - job_id: job.job_id ? job.job_id : null, - success: job.success, - message: job.message, - num_docs: job.num_docs, - docs: job.docs, - time_taken: job.time_taken, - team_id: job.team_id === "preview" ? null : job.team_id, - mode: job.mode, - url: job.url, - crawler_options: job.crawlerOptions, - page_options: job.scrapeOptions, - origin: job.origin, - num_tokens: job.num_tokens, - retry: !!job.retry, - crawl_id: job.crawl_id, - }, - ]); + if (force) { + while (true) { + try { + const { error } = await supabase_service + .from("firecrawl_jobs") + .insert([jobColumn]); + if (error) { + logger.error("Failed to log job due to Supabase error -- trying again", { error, scrapeId: job.job_id }); + await new Promise((resolve) => setTimeout(() => resolve(), 75)); + } else { + break; + } + } catch (error) { + logger.error("Failed to log job due to thrown error -- trying again", { error, scrapeId: job.job_id }); + await new Promise((resolve) => setTimeout(() => resolve(), 75)); + } + } + logger.debug("Job logged successfully!", { scrapeId: job.job_id }); + } else { + const { error } = await supabase_service + .from("firecrawl_jobs") + .insert([jobColumn]); + if (error) { + logger.error(`Error logging job: ${error.message}`, { error, scrapeId: job.job_id }); + } else { + logger.debug("Job logged successfully!", { scrapeId: job.job_id }); + } + } if (process.env.POSTHOG_API_KEY && !job.crawl_id) { let phLog = { @@ -72,9 +96,7 @@ export async function logJob(job: FirecrawlJob) { posthog.capture(phLog); } } - if (error) { - logger.error(`Error logging job: ${error.message}`); - } + } catch (error) { logger.error(`Error logging job: ${error.message}`); } diff --git a/apps/api/src/services/queue-worker.ts b/apps/api/src/services/queue-worker.ts index 54841061..c25601ca 100644 --- a/apps/api/src/services/queue-worker.ts +++ b/apps/api/src/services/queue-worker.ts @@ -346,7 +346,7 @@ async function processJob(job: Job & { id: string }, token: string) { scrapeOptions: job.data.scrapeOptions, origin: job.data.origin, crawl_id: job.data.crawl_id, - }); + }, true); await addCrawlJobDone(job.data.crawl_id, job.id); @@ -486,7 +486,7 @@ async function processJob(job: Job & { id: string }, token: string) { url: sc?.originUrl ?? (job.data.crawlerOptions === null ? "Batch Scrape" : "Unknown"), crawlerOptions: sc.crawlerOptions, origin: job.data.origin, - }); + }, true); } } } @@ -566,7 +566,7 @@ async function processJob(job: Job & { id: string }, token: string) { scrapeOptions: job.data.scrapeOptions, origin: job.data.origin, crawl_id: job.data.crawl_id, - }); + }, true); // await logJob({ // job_id: job.data.crawl_id,