mirror of
https://github.com/mendableai/firecrawl.git
synced 2024-11-16 11:42:24 +08:00
Merge pull request #653 from mendableai/mog/fix-status-job-get
fix(v0/crawl-status): don't crash on big crawls when requesting jobs from supa
This commit is contained in:
commit
ee38273ff9
|
@ -4,16 +4,16 @@ import { RateLimiterMode } from "../../../src/types";
|
|||
import { getScrapeQueue } from "../../../src/services/queue-service";
|
||||
import { Logger } from "../../../src/lib/logger";
|
||||
import { getCrawl, getCrawlJobs } from "../../../src/lib/crawl-redis";
|
||||
import { supabaseGetJobsById } from "../../../src/lib/supabase-jobs";
|
||||
import { supabaseGetJobsByCrawlId } from "../../../src/lib/supabase-jobs";
|
||||
import * as Sentry from "@sentry/node";
|
||||
import { configDotenv } from "dotenv";
|
||||
configDotenv();
|
||||
|
||||
export async function getJobs(ids: string[]) {
|
||||
export async function getJobs(crawlId: string, ids: string[]) {
|
||||
const jobs = (await Promise.all(ids.map(x => getScrapeQueue().getJob(x)))).filter(x => x);
|
||||
|
||||
if (process.env.USE_DB_AUTHENTICATION === "true") {
|
||||
const supabaseData = await supabaseGetJobsById(ids);
|
||||
const supabaseData = await supabaseGetJobsByCrawlId(crawlId);
|
||||
|
||||
supabaseData.forEach(x => {
|
||||
const job = jobs.find(y => y.id === x.job_id);
|
||||
|
@ -52,7 +52,7 @@ export async function crawlStatusController(req: Request, res: Response) {
|
|||
|
||||
const jobIDs = await getCrawlJobs(req.params.jobId);
|
||||
|
||||
const jobs = (await getJobs(jobIDs)).sort((a, b) => a.timestamp - b.timestamp);
|
||||
const jobs = (await getJobs(req.params.jobId, jobIDs)).sort((a, b) => a.timestamp - b.timestamp);
|
||||
const jobStatuses = await Promise.all(jobs.map(x => x.getState()));
|
||||
const jobStatus = sc.cancelled ? "failed" : jobStatuses.every(x => x === "completed") ? "completed" : jobStatuses.some(x => x === "failed") ? "failed" : "active";
|
||||
|
||||
|
|
|
@ -22,7 +22,7 @@ export async function crawlJobStatusPreviewController(req: Request, res: Respons
|
|||
// }
|
||||
// }
|
||||
|
||||
const jobs = (await getJobs(jobIDs)).sort((a, b) => a.timestamp - b.timestamp);
|
||||
const jobs = (await getJobs(req.params.jobId, jobIDs)).sort((a, b) => a.timestamp - b.timestamp);
|
||||
const jobStatuses = await Promise.all(jobs.map(x => x.getState()));
|
||||
const jobStatus = sc.cancelled ? "failed" : jobStatuses.every(x => x === "completed") ? "completed" : jobStatuses.some(x => x === "failed") ? "failed" : "active";
|
||||
|
||||
|
|
|
@ -2,6 +2,11 @@ import { supabase_service } from "../services/supabase";
|
|||
import { Logger } from "./logger";
|
||||
import * as Sentry from "@sentry/node";
|
||||
|
||||
/**
|
||||
* Get a single firecrawl_job by ID
|
||||
* @param jobId ID of Job
|
||||
* @returns {any | null} Job
|
||||
*/
|
||||
export const supabaseGetJobById = async (jobId: string) => {
|
||||
const { data, error } = await supabase_service
|
||||
.from("firecrawl_jobs")
|
||||
|
@ -20,13 +25,43 @@ export const supabaseGetJobById = async (jobId: string) => {
|
|||
return data;
|
||||
};
|
||||
|
||||
/**
|
||||
* Get multiple firecrawl_jobs by ID. Use this if you're not requesting a lot (50+) of jobs at once.
|
||||
* @param jobIds IDs of Jobs
|
||||
* @returns {any[]} Jobs
|
||||
*/
|
||||
export const supabaseGetJobsById = async (jobIds: string[]) => {
|
||||
const { data, error } = await supabase_service.rpc("get_jobs_by_ids", {
|
||||
job_ids: jobIds,
|
||||
});
|
||||
const { data, error } = await supabase_service
|
||||
.from("firecrawl_jobs")
|
||||
.select()
|
||||
.in("job_id", jobIds);
|
||||
|
||||
if (error) {
|
||||
Logger.error(`Error in get_jobs_by_ids: ${error}`);
|
||||
Logger.error(`Error in supabaseGetJobsById: ${error}`);
|
||||
Sentry.captureException(error);
|
||||
return [];
|
||||
}
|
||||
|
||||
if (!data) {
|
||||
return [];
|
||||
}
|
||||
|
||||
return data;
|
||||
};
|
||||
|
||||
/**
|
||||
* Get multiple firecrawl_jobs by crawl ID. Use this if you need a lot of jobs at once.
|
||||
* @param crawlId ID of crawl
|
||||
* @returns {any[]} Jobs
|
||||
*/
|
||||
export const supabaseGetJobsByCrawlId = async (crawlId: string) => {
|
||||
const { data, error } = await supabase_service
|
||||
.from("firecrawl_jobs")
|
||||
.select()
|
||||
.eq("crawl_id", crawlId)
|
||||
|
||||
if (error) {
|
||||
Logger.error(`Error in supabaseGetJobsByCrawlId: ${error}`);
|
||||
Sentry.captureException(error);
|
||||
return [];
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue
Block a user