diff --git a/apps/api/src/controllers/v0/scrape.ts b/apps/api/src/controllers/v0/scrape.ts index 8cb09cf0..d2614d4d 100644 --- a/apps/api/src/controllers/v0/scrape.ts +++ b/apps/api/src/controllers/v0/scrape.ts @@ -63,6 +63,7 @@ export async function scrapeHelper( pageOptions, extractorOptions, origin: req.body.origin ?? defaultOrigin, + is_scrape: true, }, {}, jobId, @@ -179,12 +180,10 @@ export async function scrapeController(req: Request, res: Response) { typeof extractorOptions.extractionSchema !== "object" || extractorOptions.extractionSchema === null ) { - return res - .status(400) - .json({ - error: - "extractorOptions.extractionSchema must be an object if llm-extraction mode is specified", - }); + return res.status(400).json({ + error: + "extractorOptions.extractionSchema must be an object if llm-extraction mode is specified", + }); } pageOptions.onlyMainContent = true; @@ -202,12 +201,10 @@ export async function scrapeController(req: Request, res: Response) { } catch (error) { Logger.error(error); earlyReturn = true; - return res - .status(500) - .json({ - error: - "Error checking team credits. Please contact hello@firecrawl.com for help.", - }); + return res.status(500).json({ + error: + "Error checking team credits. Please contact hello@firecrawl.com for help.", + }); } const jobId = uuidv4(); @@ -231,8 +228,8 @@ export async function scrapeController(req: Request, res: Response) { : 0; if (result.success) { - let creditsToBeBilled = 0; // billing for doc done on queue end - const creditsPerLLMExtract = 50; + let creditsToBeBilled = 1; + const creditsPerLLMExtract = 49; if (extractorOptions.mode.includes("llm-extraction")) { // creditsToBeBilled = creditsToBeBilled + (creditsPerLLMExtract * filteredDocs.length); @@ -245,13 +242,16 @@ export async function scrapeController(req: Request, res: Response) { // Don't bill if we're early returning return; } - const billingResult = await billTeam(team_id, creditsToBeBilled); - if (!billingResult.success) { - return res.status(402).json({ - success: false, - error: - "Failed to bill team. Insufficient credits or subscription not found.", - }); + if (creditsToBeBilled > 0) { + // billing for doc done on queue end, bill only for llm extraction + const billingResult = await billTeam(team_id, creditsToBeBilled); + if (!billingResult.success) { + return res.status(402).json({ + success: false, + error: + "Failed to bill team. Insufficient credits or subscription not found.", + }); + } } } @@ -276,13 +276,11 @@ export async function scrapeController(req: Request, res: Response) { } catch (error) { Sentry.captureException(error); Logger.error(error); - return res - .status(500) - .json({ - error: - typeof error === "string" - ? error - : error?.message ?? "Internal Server Error", - }); + return res.status(500).json({ + error: + typeof error === "string" + ? error + : error?.message ?? "Internal Server Error", + }); } } diff --git a/apps/api/src/controllers/v1/scrape.ts b/apps/api/src/controllers/v1/scrape.ts index 940296bf..b68bda2d 100644 --- a/apps/api/src/controllers/v1/scrape.ts +++ b/apps/api/src/controllers/v1/scrape.ts @@ -29,6 +29,7 @@ export async function scrapeController(req: RequestWithAuth<{}, ScrapeResponse, pageOptions, extractorOptions: {}, origin: req.body.origin, + is_scrape: true, }, {}, jobId, jobPriority); let doc: any | undefined; diff --git a/apps/api/src/main/runWebScraper.ts b/apps/api/src/main/runWebScraper.ts index 84d6e99a..f78d4694 100644 --- a/apps/api/src/main/runWebScraper.ts +++ b/apps/api/src/main/runWebScraper.ts @@ -57,6 +57,7 @@ export async function startWebScraperPipeline({ team_id: job.data.team_id, bull_job_id: job.id.toString(), priority: job.opts.priority, + is_scrape: job.data.is_scrape ?? false, })) as { success: boolean; message: string; docs: Document[] }; } export async function runWebScraper({ @@ -71,6 +72,7 @@ export async function runWebScraper({ team_id, bull_job_id, priority, + is_scrape=false, }: RunWebScraperParams): Promise { try { const provider = new WebScraperDataProvider(); @@ -117,18 +119,21 @@ export async function runWebScraper({ } }) : docs; - - const billingResult = await billTeam(team_id, filteredDocs.length); - if (!billingResult.success) { - // throw new Error("Failed to bill team, no subscription was found"); - return { - success: false, - message: "Failed to bill team, no subscription was found", - docs: [], - }; + if(is_scrape === false) { + const billingResult = await billTeam(team_id, filteredDocs.length); + if (!billingResult.success) { + // throw new Error("Failed to bill team, no subscription was found"); + return { + success: false, + message: "Failed to bill team, no subscription was found", + docs: [], + }; + } } + + // This is where the returnvalue from the job is set onSuccess(filteredDocs, mode); diff --git a/apps/api/src/types.ts b/apps/api/src/types.ts index c57969f2..431c0126 100644 --- a/apps/api/src/types.ts +++ b/apps/api/src/types.ts @@ -32,6 +32,7 @@ export interface WebScraperOptions { sitemapped?: boolean; webhook?: string; v1?: boolean; + is_scrape?: boolean; } export interface RunWebScraperParams { @@ -46,6 +47,7 @@ export interface RunWebScraperParams { team_id: string; bull_job_id: string; priority?: number; + is_scrape?: boolean; } export interface RunWebScraperResult { diff --git a/apps/js-sdk/firecrawl/types/index.d.ts b/apps/js-sdk/firecrawl/types/index.d.ts index bf720896..e785cfd4 100644 --- a/apps/js-sdk/firecrawl/types/index.d.ts +++ b/apps/js-sdk/firecrawl/types/index.d.ts @@ -153,16 +153,14 @@ export interface ScrapeResponseV0 { * Includes options for both scraping and mapping during a crawl. */ export interface CrawlParams { + includePaths?: string[]; + excludePaths?: string[]; + maxDepth?: number; + limit?: number; + allowBackwardLinks?: boolean; + allowExternalLinks?: boolean; + ignoreSitemap?: boolean; scrapeOptions?: ScrapeParams; - crawlerOptions?: { - includePaths?: string[]; - excludePaths?: string[]; - maxDepth?: number; - limit?: number; - allowBackwardLinks?: boolean; - allowExternalLinks?: boolean; - ignoreSitemap?: boolean; - }; } /** * Parameters for crawling operations on v0.