diff --git a/apps/api/src/scraper/WebScraper/single_url.ts b/apps/api/src/scraper/WebScraper/single_url.ts index dd4d9f71..0fce76b6 100644 --- a/apps/api/src/scraper/WebScraper/single_url.ts +++ b/apps/api/src/scraper/WebScraper/single_url.ts @@ -19,6 +19,7 @@ import { scrapWithScrapingBee } from "./scrapers/scrapingBee"; import { extractLinks } from "./utils/utils"; import { Logger } from "../../lib/logger"; import { ScrapeEvents } from "../../lib/scrape-events"; +import { clientSideError } from "../../strings"; dotenv.config(); @@ -311,7 +312,7 @@ export async function scrapSingleUrl( for (const scraper of scrapersInOrder) { // If exists text coming from crawler, use it - if (existingHtml && existingHtml.trim().length >= 100) { + if (existingHtml && existingHtml.trim().length >= 100 && !existingHtml.includes(clientSideError)) { let cleanedHtml = removeUnwantedElements(existingHtml, pageOptions); text = await parseMarkdown(cleanedHtml); html = cleanedHtml; diff --git a/apps/api/src/strings.ts b/apps/api/src/strings.ts index e7a6f21e..8edc57f1 100644 --- a/apps/api/src/strings.ts +++ b/apps/api/src/strings.ts @@ -1,2 +1,4 @@ export const errorNoResults = "No results found, please check the URL or contact us at help@mendable.ai to file a ticket."; + +export const clientSideError = "client-side exception has occurred" \ No newline at end of file