mirror of
https://github.com/mendableai/firecrawl.git
synced 2024-11-16 11:42:24 +08:00
Merge pull request #301 from mendableai/bugfix/issue-291
[Bug] Fixed includeHTML to use cleanedHtml as response
This commit is contained in:
commit
32dde257a5
|
@ -12,6 +12,7 @@
|
|||
"build": "tsc",
|
||||
"test": "npx jest --detectOpenHandles --forceExit --openHandlesTimeout=120000 --watchAll=false --testPathIgnorePatterns='src/__tests__/e2e_noAuth/*'",
|
||||
"test:local-no-auth": "npx jest --detectOpenHandles --forceExit --openHandlesTimeout=120000 --watchAll=false --testPathIgnorePatterns='src/__tests__/e2e_withAuth/*'",
|
||||
"test:full": "npx jest --detectOpenHandles --forceExit --openHandlesTimeout=120000 --watchAll=false --testPathIgnorePatterns='(src/__tests__/e2e_noAuth|src/__tests__/e2e_withAuth)'",
|
||||
"test:prod": "npx jest --detectOpenHandles --forceExit --openHandlesTimeout=120000 --watchAll=false --testPathIgnorePatterns='(src/__tests__/e2e_noAuth|src/__tests__/e2e_full_withAuth)'",
|
||||
"workers": "nodemon --exec ts-node src/services/queue-worker.ts",
|
||||
"worker:production": "node dist/src/services/queue-worker.js",
|
||||
|
|
|
@ -401,7 +401,7 @@ export async function scrapSingleUrl(
|
|||
|
||||
return {
|
||||
text: await parseMarkdown(cleanedHtml),
|
||||
html: scraperResponse.text,
|
||||
html: cleanedHtml,
|
||||
screenshot: scraperResponse.screenshot,
|
||||
pageStatusCode: scraperResponse.metadata.pageStatusCode,
|
||||
pageError: scraperResponse.metadata.pageError || undefined
|
||||
|
@ -428,7 +428,7 @@ export async function scrapSingleUrl(
|
|||
if (existingHtml && existingHtml.trim().length >= 100) {
|
||||
let cleanedHtml = removeUnwantedElements(existingHtml, pageOptions);
|
||||
text = await parseMarkdown(cleanedHtml);
|
||||
html = existingHtml;
|
||||
html = cleanedHtml;
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user