Merge pull request #301 from mendableai/bugfix/issue-291

[Bug] Fixed includeHTML to use cleanedHtml as response
This commit is contained in:
Nicolas 2024-06-18 16:26:55 -04:00 committed by GitHub
commit 32dde257a5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 3 additions and 2 deletions

View File

@ -12,6 +12,7 @@
"build": "tsc",
"test": "npx jest --detectOpenHandles --forceExit --openHandlesTimeout=120000 --watchAll=false --testPathIgnorePatterns='src/__tests__/e2e_noAuth/*'",
"test:local-no-auth": "npx jest --detectOpenHandles --forceExit --openHandlesTimeout=120000 --watchAll=false --testPathIgnorePatterns='src/__tests__/e2e_withAuth/*'",
"test:full": "npx jest --detectOpenHandles --forceExit --openHandlesTimeout=120000 --watchAll=false --testPathIgnorePatterns='(src/__tests__/e2e_noAuth|src/__tests__/e2e_withAuth)'",
"test:prod": "npx jest --detectOpenHandles --forceExit --openHandlesTimeout=120000 --watchAll=false --testPathIgnorePatterns='(src/__tests__/e2e_noAuth|src/__tests__/e2e_full_withAuth)'",
"workers": "nodemon --exec ts-node src/services/queue-worker.ts",
"worker:production": "node dist/src/services/queue-worker.js",

View File

@ -401,7 +401,7 @@ export async function scrapSingleUrl(
return {
text: await parseMarkdown(cleanedHtml),
html: scraperResponse.text,
html: cleanedHtml,
screenshot: scraperResponse.screenshot,
pageStatusCode: scraperResponse.metadata.pageStatusCode,
pageError: scraperResponse.metadata.pageError || undefined
@ -428,7 +428,7 @@ export async function scrapSingleUrl(
if (existingHtml && existingHtml.trim().length >= 100) {
let cleanedHtml = removeUnwantedElements(existingHtml, pageOptions);
text = await parseMarkdown(cleanedHtml);
html = existingHtml;
html = cleanedHtml;
break;
}