mirror of
https://github.com/mendableai/firecrawl.git
synced 2024-11-16 11:42:24 +08:00
fix(log_job): add force option to retry on supabase failure
This commit is contained in:
parent
7b02c45dd0
commit
ca2e33db0a
|
@ -7,7 +7,7 @@ import { logger } from "../../lib/logger";
|
||||||
import { configDotenv } from "dotenv";
|
import { configDotenv } from "dotenv";
|
||||||
configDotenv();
|
configDotenv();
|
||||||
|
|
||||||
export async function logJob(job: FirecrawlJob) {
|
export async function logJob(job: FirecrawlJob, force: boolean = false) {
|
||||||
try {
|
try {
|
||||||
const useDbAuthentication = process.env.USE_DB_AUTHENTICATION === 'true';
|
const useDbAuthentication = process.env.USE_DB_AUTHENTICATION === 'true';
|
||||||
if (!useDbAuthentication) {
|
if (!useDbAuthentication) {
|
||||||
|
@ -23,28 +23,52 @@ export async function logJob(job: FirecrawlJob) {
|
||||||
job.scrapeOptions.headers["Authorization"] = "REDACTED";
|
job.scrapeOptions.headers["Authorization"] = "REDACTED";
|
||||||
job.docs = [{ content: "REDACTED DUE TO AUTHORIZATION HEADER", html: "REDACTED DUE TO AUTHORIZATION HEADER" }];
|
job.docs = [{ content: "REDACTED DUE TO AUTHORIZATION HEADER", html: "REDACTED DUE TO AUTHORIZATION HEADER" }];
|
||||||
}
|
}
|
||||||
|
const jobColumn = {
|
||||||
|
job_id: job.job_id ? job.job_id : null,
|
||||||
|
success: job.success,
|
||||||
|
message: job.message,
|
||||||
|
num_docs: job.num_docs,
|
||||||
|
docs: job.docs,
|
||||||
|
time_taken: job.time_taken,
|
||||||
|
team_id: job.team_id === "preview" ? null : job.team_id,
|
||||||
|
mode: job.mode,
|
||||||
|
url: job.url,
|
||||||
|
crawler_options: job.crawlerOptions,
|
||||||
|
page_options: job.scrapeOptions,
|
||||||
|
origin: job.origin,
|
||||||
|
num_tokens: job.num_tokens,
|
||||||
|
retry: !!job.retry,
|
||||||
|
crawl_id: job.crawl_id,
|
||||||
|
};
|
||||||
|
|
||||||
const { data, error } = await supabase_service
|
if (force) {
|
||||||
.from("firecrawl_jobs")
|
while (true) {
|
||||||
.insert([
|
try {
|
||||||
{
|
const { error } = await supabase_service
|
||||||
job_id: job.job_id ? job.job_id : null,
|
.from("firecrawl_jobs")
|
||||||
success: job.success,
|
.insert([jobColumn]);
|
||||||
message: job.message,
|
if (error) {
|
||||||
num_docs: job.num_docs,
|
logger.error("Failed to log job due to Supabase error -- trying again", { error, scrapeId: job.job_id });
|
||||||
docs: job.docs,
|
await new Promise<void>((resolve) => setTimeout(() => resolve(), 75));
|
||||||
time_taken: job.time_taken,
|
} else {
|
||||||
team_id: job.team_id === "preview" ? null : job.team_id,
|
break;
|
||||||
mode: job.mode,
|
}
|
||||||
url: job.url,
|
} catch (error) {
|
||||||
crawler_options: job.crawlerOptions,
|
logger.error("Failed to log job due to thrown error -- trying again", { error, scrapeId: job.job_id });
|
||||||
page_options: job.scrapeOptions,
|
await new Promise<void>((resolve) => setTimeout(() => resolve(), 75));
|
||||||
origin: job.origin,
|
}
|
||||||
num_tokens: job.num_tokens,
|
}
|
||||||
retry: !!job.retry,
|
logger.debug("Job logged successfully!", { scrapeId: job.job_id });
|
||||||
crawl_id: job.crawl_id,
|
} else {
|
||||||
},
|
const { error } = await supabase_service
|
||||||
]);
|
.from("firecrawl_jobs")
|
||||||
|
.insert([jobColumn]);
|
||||||
|
if (error) {
|
||||||
|
logger.error(`Error logging job: ${error.message}`, { error, scrapeId: job.job_id });
|
||||||
|
} else {
|
||||||
|
logger.debug("Job logged successfully!", { scrapeId: job.job_id });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (process.env.POSTHOG_API_KEY && !job.crawl_id) {
|
if (process.env.POSTHOG_API_KEY && !job.crawl_id) {
|
||||||
let phLog = {
|
let phLog = {
|
||||||
|
@ -72,9 +96,7 @@ export async function logJob(job: FirecrawlJob) {
|
||||||
posthog.capture(phLog);
|
posthog.capture(phLog);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (error) {
|
|
||||||
logger.error(`Error logging job: ${error.message}`);
|
|
||||||
}
|
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
logger.error(`Error logging job: ${error.message}`);
|
logger.error(`Error logging job: ${error.message}`);
|
||||||
}
|
}
|
||||||
|
|
|
@ -346,7 +346,7 @@ async function processJob(job: Job & { id: string }, token: string) {
|
||||||
scrapeOptions: job.data.scrapeOptions,
|
scrapeOptions: job.data.scrapeOptions,
|
||||||
origin: job.data.origin,
|
origin: job.data.origin,
|
||||||
crawl_id: job.data.crawl_id,
|
crawl_id: job.data.crawl_id,
|
||||||
});
|
}, true);
|
||||||
|
|
||||||
await addCrawlJobDone(job.data.crawl_id, job.id);
|
await addCrawlJobDone(job.data.crawl_id, job.id);
|
||||||
|
|
||||||
|
@ -486,7 +486,7 @@ async function processJob(job: Job & { id: string }, token: string) {
|
||||||
url: sc?.originUrl ?? (job.data.crawlerOptions === null ? "Batch Scrape" : "Unknown"),
|
url: sc?.originUrl ?? (job.data.crawlerOptions === null ? "Batch Scrape" : "Unknown"),
|
||||||
crawlerOptions: sc.crawlerOptions,
|
crawlerOptions: sc.crawlerOptions,
|
||||||
origin: job.data.origin,
|
origin: job.data.origin,
|
||||||
});
|
}, true);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -566,7 +566,7 @@ async function processJob(job: Job & { id: string }, token: string) {
|
||||||
scrapeOptions: job.data.scrapeOptions,
|
scrapeOptions: job.data.scrapeOptions,
|
||||||
origin: job.data.origin,
|
origin: job.data.origin,
|
||||||
crawl_id: job.data.crawl_id,
|
crawl_id: job.data.crawl_id,
|
||||||
});
|
}, true);
|
||||||
|
|
||||||
// await logJob({
|
// await logJob({
|
||||||
// job_id: job.data.crawl_id,
|
// job_id: job.data.crawl_id,
|
||||||
|
|
Loading…
Reference in New Issue
Block a user