diff --git a/apps/api/src/controllers/crawl.ts b/apps/api/src/controllers/crawl.ts index 2f7f8426..17cfa625 100644 --- a/apps/api/src/controllers/crawl.ts +++ b/apps/api/src/controllers/crawl.ts @@ -42,6 +42,7 @@ export async function crawlController(req: Request, res: Response) { returnOnlyUrls: true, }, pageOptions: pageOptions, + }); const docs = await a.getDocuments(false, (progress) => { @@ -67,6 +68,7 @@ export async function crawlController(req: Request, res: Response) { crawlerOptions: { ...crawlerOptions }, team_id: team_id, pageOptions: pageOptions, + origin: req.body.origin ?? "api", }); res.json({ jobId: job.id }); diff --git a/apps/api/src/controllers/crawlPreview.ts b/apps/api/src/controllers/crawlPreview.ts index 641468c4..3f28ef60 100644 --- a/apps/api/src/controllers/crawlPreview.ts +++ b/apps/api/src/controllers/crawlPreview.ts @@ -21,12 +21,14 @@ export async function crawlPreviewController(req: Request, res: Response) { const mode = req.body.mode ?? "crawl"; const crawlerOptions = req.body.crawlerOptions ?? {}; const pageOptions = req.body.pageOptions ?? { onlyMainContent: false }; + const job = await addWebScraperJob({ url: url, mode: mode ?? "crawl", // fix for single urls not working crawlerOptions: { ...crawlerOptions, limit: 5, maxCrawledLinks: 5 }, team_id: "preview", pageOptions: pageOptions, + origin: "website-preview", }); res.json({ jobId: job.id }); diff --git a/apps/api/src/controllers/scrape.ts b/apps/api/src/controllers/scrape.ts index 51d14f2d..632fff59 100644 --- a/apps/api/src/controllers/scrape.ts +++ b/apps/api/src/controllers/scrape.ts @@ -72,6 +72,7 @@ export async function scrapeController(req: Request, res: Response) { } const crawlerOptions = req.body.crawlerOptions ?? {}; const pageOptions = req.body.pageOptions ?? { onlyMainContent: false }; + const origin = req.body.origin ?? "api"; try { const { success: creditsCheckSuccess, message: creditsCheckMessage } = @@ -83,24 +84,27 @@ export async function scrapeController(req: Request, res: Response) { console.error(error); return res.status(500).json({ error: "Internal server error" }); } - + const startTime = new Date().getTime(); const result = await scrapeHelper( req, team_id, crawlerOptions, pageOptions ); + const endTime = new Date().getTime(); + const timeTakenInSeconds = (endTime - startTime) / 1000; logJob({ success: result.success, message: result.error, num_docs: 1, docs: [result.data], - time_taken: 0, + time_taken: timeTakenInSeconds, team_id: team_id, mode: "scrape", url: req.body.url, crawlerOptions: crawlerOptions, pageOptions: pageOptions, + origin: origin, }); return res.status(result.returnCode).json(result); } catch (error) { diff --git a/apps/api/src/main/runWebScraper.ts b/apps/api/src/main/runWebScraper.ts index 0f562a05..d9434291 100644 --- a/apps/api/src/main/runWebScraper.ts +++ b/apps/api/src/main/runWebScraper.ts @@ -44,7 +44,11 @@ export async function runWebScraper({ onSuccess: (result: any) => void; onError: (error: any) => void; team_id: string; -}): Promise<{ success: boolean; message: string; docs: CrawlResult[] }> { +}): Promise<{ + success: boolean; + message: string; + docs: CrawlResult[]; +}> { try { const provider = new WebScraperDataProvider(); if (mode === "crawl") { @@ -70,7 +74,7 @@ export async function runWebScraper({ return { success: true, message: "No pages found", - docs: [], + docs: [] }; } @@ -87,7 +91,7 @@ export async function runWebScraper({ return { success: false, message: "Failed to bill team, no subscription was found", - docs: [], + docs: [] }; } diff --git a/apps/api/src/services/logging/log_job.ts b/apps/api/src/services/logging/log_job.ts index cb7e6487..639b3a8c 100644 --- a/apps/api/src/services/logging/log_job.ts +++ b/apps/api/src/services/logging/log_job.ts @@ -17,11 +17,12 @@ export async function logJob(job: FirecrawlJob) { num_docs: job.num_docs, docs: job.docs, time_taken: job.time_taken, - team_id: job.team_id, + team_id: job.team_id === "preview" ? null : job.team_id, mode: job.mode, url: job.url, crawler_options: job.crawlerOptions, page_options: job.pageOptions, + origin: job.origin, }, ]); if (error) { diff --git a/apps/api/src/services/queue-worker.ts b/apps/api/src/services/queue-worker.ts index d4364012..8d7a7bd9 100644 --- a/apps/api/src/services/queue-worker.ts +++ b/apps/api/src/services/queue-worker.ts @@ -17,10 +17,11 @@ getWebScraperQueue().process( current_url: "", }); const start = Date.now(); + const { success, message, docs } = await startWebScraperPipeline({ job }); const end = Date.now(); const timeTakenInSeconds = (end - start) / 1000; - + const data = { success: success, result: { @@ -33,7 +34,7 @@ getWebScraperQueue().process( }; await callWebhook(job.data.team_id, data); - + await logJob({ success: success, message: message, @@ -45,6 +46,7 @@ getWebScraperQueue().process( url: job.data.url, crawlerOptions: job.data.crawlerOptions, pageOptions: job.data.pageOptions, + origin: job.data.origin, }); done(null, data); } catch (error) { diff --git a/apps/api/src/services/webhook.ts b/apps/api/src/services/webhook.ts index a086425c..ab1f90ea 100644 --- a/apps/api/src/services/webhook.ts +++ b/apps/api/src/services/webhook.ts @@ -1,6 +1,7 @@ import { supabase_service } from "./supabase"; export const callWebhook = async (teamId: string, data: any) => { + try { const { data: webhooksData, error } = await supabase_service .from('webhooks') .select('url') @@ -37,5 +38,9 @@ export const callWebhook = async (teamId: string, data: any) => { data: dataToSend, error: data.error || undefined, }), - }); -} \ No newline at end of file + }); + } catch (error) { + console.error(`Error sending webhook for team ID: ${teamId}`, error.message); + } +}; + diff --git a/apps/api/src/types.ts b/apps/api/src/types.ts index e3fc5dc7..f9e5c739 100644 --- a/apps/api/src/types.ts +++ b/apps/api/src/types.ts @@ -22,6 +22,7 @@ export interface WebScraperOptions { crawlerOptions: any; pageOptions: any; team_id: string; + origin?: string; } @@ -36,6 +37,7 @@ export interface FirecrawlJob { url: string; crawlerOptions?: any; pageOptions?: any; + origin: string; }