mirror of
https://github.com/mendableai/firecrawl.git
synced 2024-11-16 11:42:24 +08:00
Update fireEngine.ts
This commit is contained in:
parent
f494d2b707
commit
f98be7d94e
|
@ -136,27 +136,29 @@ export async function scrapWithFireEngine({
|
||||||
return { html: "", screenshot: "", pageStatusCode: null, pageError: "" };
|
return { html: "", screenshot: "", pageStatusCode: null, pageError: "" };
|
||||||
}
|
}
|
||||||
|
|
||||||
if (response.status !== 200) {
|
if (checkStatusResponse.status !== 200 || checkStatusResponse.data.error) {
|
||||||
Logger.debug(
|
Logger.debug(
|
||||||
`⛏️ Fire-Engine (${engine}): Failed to fetch url: ${url} \t status: ${response.status}`
|
`⛏️ Fire-Engine (${engine}): Failed to fetch url: ${url} \t status: ${checkStatusResponse.status}`
|
||||||
);
|
);
|
||||||
|
|
||||||
logParams.error_message = response.data?.pageError;
|
logParams.error_message = checkStatusResponse.data?.pageError ?? checkStatusResponse.data?.error;
|
||||||
logParams.response_code = response.data?.pageStatusCode;
|
logParams.response_code = checkStatusResponse.data?.pageStatusCode;
|
||||||
|
|
||||||
if(response.data && response.data?.pageStatusCode !== 200) {
|
if(checkStatusResponse.data && checkStatusResponse.data?.pageStatusCode !== 200) {
|
||||||
Logger.debug(`⛏️ Fire-Engine (${engine}): Failed to fetch url: ${url} \t status: ${response.status}`);
|
Logger.debug(`⛏️ Fire-Engine (${engine}): Failed to fetch url: ${url} \t status: ${response.status}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const pageStatusCode = checkStatusResponse.data?.pageStatusCode ? checkStatusResponse.data?.pageStatusCode : checkStatusResponse.data?.error && checkStatusResponse.data?.error.includes("Dns resolution error for hostname") ? 404 : undefined;
|
||||||
|
|
||||||
return {
|
return {
|
||||||
html: "",
|
html: "",
|
||||||
screenshot: "",
|
screenshot: "",
|
||||||
pageStatusCode: response.data?.pageStatusCode,
|
pageStatusCode,
|
||||||
pageError: response.data?.pageError,
|
pageError: checkStatusResponse.data?.pageError ?? checkStatusResponse.data?.error,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
const contentType = response.headers["content-type"];
|
const contentType = checkStatusResponse.headers["content-type"];
|
||||||
if (contentType && contentType.includes("application/pdf")) {
|
if (contentType && contentType.includes("application/pdf")) {
|
||||||
const { content, pageStatusCode, pageError } = await fetchAndProcessPdf(
|
const { content, pageStatusCode, pageError } = await fetchAndProcessPdf(
|
||||||
url,
|
url,
|
||||||
|
@ -167,18 +169,18 @@ export async function scrapWithFireEngine({
|
||||||
logParams.error_message = pageError;
|
logParams.error_message = pageError;
|
||||||
return { html: content, screenshot: "", pageStatusCode, pageError };
|
return { html: content, screenshot: "", pageStatusCode, pageError };
|
||||||
} else {
|
} else {
|
||||||
const data = response.data;
|
const data = checkStatusResponse.data;
|
||||||
logParams.success =
|
logParams.success =
|
||||||
(data.pageStatusCode >= 200 && data.pageStatusCode < 300) ||
|
(data.pageStatusCode >= 200 && data.pageStatusCode < 300) ||
|
||||||
data.pageStatusCode === 404;
|
data.pageStatusCode === 404;
|
||||||
logParams.html = data.content ?? "";
|
logParams.html = data.content ?? "";
|
||||||
logParams.response_code = data.pageStatusCode;
|
logParams.response_code = data.pageStatusCode;
|
||||||
logParams.error_message = data.pageError;
|
logParams.error_message = data.pageError ?? data.error;
|
||||||
return {
|
return {
|
||||||
html: data.content ?? "",
|
html: data.content ?? "",
|
||||||
screenshot: data.screenshot ?? "",
|
screenshot: data.screenshot ?? "",
|
||||||
pageStatusCode: data.pageStatusCode,
|
pageStatusCode: data.pageStatusCode,
|
||||||
pageError: data.pageError,
|
pageError: data.pageError ?? data.error,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
|
|
Loading…
Reference in New Issue
Block a user