mirror of
https://github.com/mendableai/firecrawl.git
synced 2024-11-16 11:42:24 +08:00
Fixed includeHTML to use cleanedHtml as response
This commit is contained in:
parent
1c5a1dd487
commit
9c539e9113
|
@ -12,6 +12,7 @@
|
||||||
"build": "tsc",
|
"build": "tsc",
|
||||||
"test": "npx jest --detectOpenHandles --forceExit --openHandlesTimeout=120000 --watchAll=false --testPathIgnorePatterns='src/__tests__/e2e_noAuth/*'",
|
"test": "npx jest --detectOpenHandles --forceExit --openHandlesTimeout=120000 --watchAll=false --testPathIgnorePatterns='src/__tests__/e2e_noAuth/*'",
|
||||||
"test:local-no-auth": "npx jest --detectOpenHandles --forceExit --openHandlesTimeout=120000 --watchAll=false --testPathIgnorePatterns='src/__tests__/e2e_withAuth/*'",
|
"test:local-no-auth": "npx jest --detectOpenHandles --forceExit --openHandlesTimeout=120000 --watchAll=false --testPathIgnorePatterns='src/__tests__/e2e_withAuth/*'",
|
||||||
|
"test:full": "npx jest --detectOpenHandles --forceExit --openHandlesTimeout=120000 --watchAll=false --testPathIgnorePatterns='(src/__tests__/e2e_noAuth|src/__tests__/e2e_withAuth)'",
|
||||||
"test:prod": "npx jest --detectOpenHandles --forceExit --openHandlesTimeout=120000 --watchAll=false --testPathIgnorePatterns='(src/__tests__/e2e_noAuth|src/__tests__/e2e_full_withAuth)'",
|
"test:prod": "npx jest --detectOpenHandles --forceExit --openHandlesTimeout=120000 --watchAll=false --testPathIgnorePatterns='(src/__tests__/e2e_noAuth|src/__tests__/e2e_full_withAuth)'",
|
||||||
"workers": "nodemon --exec ts-node src/services/queue-worker.ts",
|
"workers": "nodemon --exec ts-node src/services/queue-worker.ts",
|
||||||
"worker:production": "node dist/src/services/queue-worker.js",
|
"worker:production": "node dist/src/services/queue-worker.js",
|
||||||
|
|
|
@ -401,7 +401,7 @@ export async function scrapSingleUrl(
|
||||||
|
|
||||||
return {
|
return {
|
||||||
text: await parseMarkdown(cleanedHtml),
|
text: await parseMarkdown(cleanedHtml),
|
||||||
html: scraperResponse.text,
|
html: cleanedHtml,
|
||||||
screenshot: scraperResponse.screenshot,
|
screenshot: scraperResponse.screenshot,
|
||||||
pageStatusCode: scraperResponse.metadata.pageStatusCode,
|
pageStatusCode: scraperResponse.metadata.pageStatusCode,
|
||||||
pageError: scraperResponse.metadata.pageError || undefined
|
pageError: scraperResponse.metadata.pageError || undefined
|
||||||
|
@ -428,7 +428,7 @@ export async function scrapSingleUrl(
|
||||||
if (existingHtml && existingHtml.trim().length >= 100) {
|
if (existingHtml && existingHtml.trim().length >= 100) {
|
||||||
let cleanedHtml = removeUnwantedElements(existingHtml, pageOptions);
|
let cleanedHtml = removeUnwantedElements(existingHtml, pageOptions);
|
||||||
text = await parseMarkdown(cleanedHtml);
|
text = await parseMarkdown(cleanedHtml);
|
||||||
html = existingHtml;
|
html = cleanedHtml;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user