fix(log_job): add force option to retry on supabase failure

This commit is contained in:
Gergő Móricz 2024-11-15 19:55:23 +01:00
parent 7b02c45dd0
commit ca2e33db0a
2 changed files with 50 additions and 28 deletions

View File

@ -7,7 +7,7 @@ import { logger } from "../../lib/logger";
import { configDotenv } from "dotenv"; import { configDotenv } from "dotenv";
configDotenv(); configDotenv();
export async function logJob(job: FirecrawlJob) { export async function logJob(job: FirecrawlJob, force: boolean = false) {
try { try {
const useDbAuthentication = process.env.USE_DB_AUTHENTICATION === 'true'; const useDbAuthentication = process.env.USE_DB_AUTHENTICATION === 'true';
if (!useDbAuthentication) { if (!useDbAuthentication) {
@ -23,28 +23,52 @@ export async function logJob(job: FirecrawlJob) {
job.scrapeOptions.headers["Authorization"] = "REDACTED"; job.scrapeOptions.headers["Authorization"] = "REDACTED";
job.docs = [{ content: "REDACTED DUE TO AUTHORIZATION HEADER", html: "REDACTED DUE TO AUTHORIZATION HEADER" }]; job.docs = [{ content: "REDACTED DUE TO AUTHORIZATION HEADER", html: "REDACTED DUE TO AUTHORIZATION HEADER" }];
} }
const jobColumn = {
job_id: job.job_id ? job.job_id : null,
success: job.success,
message: job.message,
num_docs: job.num_docs,
docs: job.docs,
time_taken: job.time_taken,
team_id: job.team_id === "preview" ? null : job.team_id,
mode: job.mode,
url: job.url,
crawler_options: job.crawlerOptions,
page_options: job.scrapeOptions,
origin: job.origin,
num_tokens: job.num_tokens,
retry: !!job.retry,
crawl_id: job.crawl_id,
};
const { data, error } = await supabase_service if (force) {
.from("firecrawl_jobs") while (true) {
.insert([ try {
{ const { error } = await supabase_service
job_id: job.job_id ? job.job_id : null, .from("firecrawl_jobs")
success: job.success, .insert([jobColumn]);
message: job.message, if (error) {
num_docs: job.num_docs, logger.error("Failed to log job due to Supabase error -- trying again", { error, scrapeId: job.job_id });
docs: job.docs, await new Promise<void>((resolve) => setTimeout(() => resolve(), 75));
time_taken: job.time_taken, } else {
team_id: job.team_id === "preview" ? null : job.team_id, break;
mode: job.mode, }
url: job.url, } catch (error) {
crawler_options: job.crawlerOptions, logger.error("Failed to log job due to thrown error -- trying again", { error, scrapeId: job.job_id });
page_options: job.scrapeOptions, await new Promise<void>((resolve) => setTimeout(() => resolve(), 75));
origin: job.origin, }
num_tokens: job.num_tokens, }
retry: !!job.retry, logger.debug("Job logged successfully!", { scrapeId: job.job_id });
crawl_id: job.crawl_id, } else {
}, const { error } = await supabase_service
]); .from("firecrawl_jobs")
.insert([jobColumn]);
if (error) {
logger.error(`Error logging job: ${error.message}`, { error, scrapeId: job.job_id });
} else {
logger.debug("Job logged successfully!", { scrapeId: job.job_id });
}
}
if (process.env.POSTHOG_API_KEY && !job.crawl_id) { if (process.env.POSTHOG_API_KEY && !job.crawl_id) {
let phLog = { let phLog = {
@ -72,9 +96,7 @@ export async function logJob(job: FirecrawlJob) {
posthog.capture(phLog); posthog.capture(phLog);
} }
} }
if (error) {
logger.error(`Error logging job: ${error.message}`);
}
} catch (error) { } catch (error) {
logger.error(`Error logging job: ${error.message}`); logger.error(`Error logging job: ${error.message}`);
} }

View File

@ -346,7 +346,7 @@ async function processJob(job: Job & { id: string }, token: string) {
scrapeOptions: job.data.scrapeOptions, scrapeOptions: job.data.scrapeOptions,
origin: job.data.origin, origin: job.data.origin,
crawl_id: job.data.crawl_id, crawl_id: job.data.crawl_id,
}); }, true);
await addCrawlJobDone(job.data.crawl_id, job.id); await addCrawlJobDone(job.data.crawl_id, job.id);
@ -486,7 +486,7 @@ async function processJob(job: Job & { id: string }, token: string) {
url: sc?.originUrl ?? (job.data.crawlerOptions === null ? "Batch Scrape" : "Unknown"), url: sc?.originUrl ?? (job.data.crawlerOptions === null ? "Batch Scrape" : "Unknown"),
crawlerOptions: sc.crawlerOptions, crawlerOptions: sc.crawlerOptions,
origin: job.data.origin, origin: job.data.origin,
}); }, true);
} }
} }
} }
@ -566,7 +566,7 @@ async function processJob(job: Job & { id: string }, token: string) {
scrapeOptions: job.data.scrapeOptions, scrapeOptions: job.data.scrapeOptions,
origin: job.data.origin, origin: job.data.origin,
crawl_id: job.data.crawl_id, crawl_id: job.data.crawl_id,
}); }, true);
// await logJob({ // await logJob({
// job_id: job.data.crawl_id, // job_id: job.data.crawl_id,