diff --git a/apps/api/src/controllers/v0/crawl-status.ts b/apps/api/src/controllers/v0/crawl-status.ts index a3f3f16f..41491f86 100644 --- a/apps/api/src/controllers/v0/crawl-status.ts +++ b/apps/api/src/controllers/v0/crawl-status.ts @@ -4,16 +4,16 @@ import { RateLimiterMode } from "../../../src/types"; import { getScrapeQueue } from "../../../src/services/queue-service"; import { Logger } from "../../../src/lib/logger"; import { getCrawl, getCrawlJobs } from "../../../src/lib/crawl-redis"; -import { supabaseGetJobsById } from "../../../src/lib/supabase-jobs"; +import { supabaseGetJobsByCrawlId } from "../../../src/lib/supabase-jobs"; import * as Sentry from "@sentry/node"; import { configDotenv } from "dotenv"; configDotenv(); -export async function getJobs(ids: string[]) { +export async function getJobs(crawlId: string, ids: string[]) { const jobs = (await Promise.all(ids.map(x => getScrapeQueue().getJob(x)))).filter(x => x); if (process.env.USE_DB_AUTHENTICATION === "true") { - const supabaseData = await supabaseGetJobsById(ids); + const supabaseData = await supabaseGetJobsByCrawlId(crawlId); supabaseData.forEach(x => { const job = jobs.find(y => y.id === x.job_id); @@ -52,7 +52,7 @@ export async function crawlStatusController(req: Request, res: Response) { const jobIDs = await getCrawlJobs(req.params.jobId); - const jobs = (await getJobs(jobIDs)).sort((a, b) => a.timestamp - b.timestamp); + const jobs = (await getJobs(req.params.jobId, jobIDs)).sort((a, b) => a.timestamp - b.timestamp); const jobStatuses = await Promise.all(jobs.map(x => x.getState())); const jobStatus = sc.cancelled ? "failed" : jobStatuses.every(x => x === "completed") ? "completed" : jobStatuses.some(x => x === "failed") ? "failed" : "active"; diff --git a/apps/api/src/controllers/v0/status.ts b/apps/api/src/controllers/v0/status.ts index 34ebb3c6..bf8d2834 100644 --- a/apps/api/src/controllers/v0/status.ts +++ b/apps/api/src/controllers/v0/status.ts @@ -22,7 +22,7 @@ export async function crawlJobStatusPreviewController(req: Request, res: Respons // } // } - const jobs = (await getJobs(jobIDs)).sort((a, b) => a.timestamp - b.timestamp); + const jobs = (await getJobs(req.params.jobId, jobIDs)).sort((a, b) => a.timestamp - b.timestamp); const jobStatuses = await Promise.all(jobs.map(x => x.getState())); const jobStatus = sc.cancelled ? "failed" : jobStatuses.every(x => x === "completed") ? "completed" : jobStatuses.some(x => x === "failed") ? "failed" : "active"; diff --git a/apps/api/src/lib/supabase-jobs.ts b/apps/api/src/lib/supabase-jobs.ts index cda6fd46..52e594c4 100644 --- a/apps/api/src/lib/supabase-jobs.ts +++ b/apps/api/src/lib/supabase-jobs.ts @@ -2,6 +2,11 @@ import { supabase_service } from "../services/supabase"; import { Logger } from "./logger"; import * as Sentry from "@sentry/node"; +/** + * Get a single firecrawl_job by ID + * @param jobId ID of Job + * @returns {any | null} Job + */ export const supabaseGetJobById = async (jobId: string) => { const { data, error } = await supabase_service .from("firecrawl_jobs") @@ -20,6 +25,11 @@ export const supabaseGetJobById = async (jobId: string) => { return data; }; +/** + * Get multiple firecrawl_jobs by ID. Use this if you're not requesting a lot (50+) of jobs at once. + * @param jobIds IDs of Jobs + * @returns {any[]} Jobs + */ export const supabaseGetJobsById = async (jobIds: string[]) => { const { data, error } = await supabase_service.rpc("get_jobs_by_ids", { job_ids: jobIds, @@ -38,6 +48,30 @@ export const supabaseGetJobsById = async (jobIds: string[]) => { return data; }; +/** + * Get multiple firecrawl_jobs by crawl ID. Use this if you need a lot of jobs at once. + * @param crawlId ID of crawl + * @returns {any[]} Jobs + */ +export const supabaseGetJobsByCrawlId = async (crawlId: string) => { + const { data, error } = await supabase_service + .from("firecrawl_jobs") + .select() + .eq("crawl_id", crawlId) + + if (error) { + Logger.error(`Error in supabaseGetJobsByCrawlId: ${error}`); + Sentry.captureException(error); + return []; + } + + if (!data) { + return []; + } + + return data; +}; + export const supabaseGetJobByIdOnlyData = async (jobId: string) => { const { data, error } = await supabase_service