mirror of
https://github.com/mendableai/firecrawl.git
synced 2024-11-16 11:42:24 +08:00
Merge branch 'main' of https://github.com/mendableai/firecrawl
This commit is contained in:
commit
1074e976cd
|
@ -30,7 +30,14 @@ export const url = z.preprocess(
|
|||
"URL must have a valid top-level domain or be a valid path"
|
||||
)
|
||||
.refine(
|
||||
(x) => checkUrl(x as string),
|
||||
(x) => {
|
||||
try {
|
||||
checkUrl(x as string)
|
||||
return true;
|
||||
} catch (_) {
|
||||
return false;
|
||||
}
|
||||
},
|
||||
"Invalid URL"
|
||||
)
|
||||
.refine(
|
||||
|
|
|
@ -201,16 +201,20 @@ if (cluster.isMaster) {
|
|||
Sentry.setupExpressErrorHandler(app);
|
||||
|
||||
app.use((err: unknown, req: Request<{}, ErrorResponse, undefined>, res: ResponseWithSentry<ErrorResponse>, next: NextFunction) => {
|
||||
if (err instanceof SyntaxError && 'status' in err && err.status === 400 && 'body' in err) {
|
||||
return res.status(400).json({ success: false, error: 'Bad request, malformed JSON' });
|
||||
}
|
||||
|
||||
const id = res.sentry ?? uuidv4();
|
||||
let verbose = JSON.stringify(err);
|
||||
if (verbose === "{}") {
|
||||
if (err instanceof Error) {
|
||||
verbose = JSON.stringify({
|
||||
message: err.message,
|
||||
name: err.name,
|
||||
stack: err.stack,
|
||||
});
|
||||
}
|
||||
if (err instanceof Error) {
|
||||
verbose = JSON.stringify({
|
||||
message: err.message,
|
||||
name: err.name,
|
||||
stack: err.stack,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
Logger.error("Error occurred in request! (" + req.path + ") -- ID " + id + " -- " + verbose);
|
||||
|
|
|
@ -83,7 +83,7 @@ function idempotencyMiddleware(req: Request, res: Response, next: NextFunction)
|
|||
}
|
||||
|
||||
function blocklistMiddleware(req: Request, res: Response, next: NextFunction) {
|
||||
if (req.body.url && isUrlBlocked(req.body.url)) {
|
||||
if (typeof req.body.url === "string" && isUrlBlocked(req.body.url)) {
|
||||
if (!res.headersSent) {
|
||||
return res.status(403).json({ success: false, error: "URL is blocked. Firecrawl currently does not support social media scraping due to policy restrictions." });
|
||||
}
|
||||
|
|
|
@ -55,7 +55,7 @@ export async function scrapWithFireEngine({
|
|||
try {
|
||||
const reqParams = await generateRequestParams(url);
|
||||
let waitParam = reqParams["params"]?.wait ?? waitFor;
|
||||
let engineParam = reqParams["params"]?.engine ?? reqParams["params"]?.fireEngineOptions?.engine ?? fireEngineOptions?.engine ?? "playwright";
|
||||
let engineParam = reqParams["params"]?.engine ?? reqParams["params"]?.fireEngineOptions?.engine ?? fireEngineOptions?.engine ?? "chrome-cdp";
|
||||
let screenshotParam = reqParams["params"]?.screenshot ?? screenshot;
|
||||
let fullPageScreenshotParam = reqParams["params"]?.fullPageScreenshot ?? fullPageScreenshot;
|
||||
let fireEngineOptionsParam : FireEngineOptions = reqParams["params"]?.fireEngineOptions ?? fireEngineOptions;
|
||||
|
|
|
@ -96,15 +96,15 @@ function getScrapingFallbackOrder(
|
|||
"fetch",
|
||||
].filter(Boolean);
|
||||
|
||||
if (isWaitPresent || isScreenshotPresent || isHeadersPresent) {
|
||||
defaultOrder = [
|
||||
"fire-engine",
|
||||
useFireEngine ? undefined : "playwright",
|
||||
...defaultOrder.filter(
|
||||
(scraper) => scraper !== "fire-engine" && scraper !== "playwright"
|
||||
),
|
||||
].filter(Boolean);
|
||||
}
|
||||
// if (isWaitPresent || isScreenshotPresent || isHeadersPresent) {
|
||||
// defaultOrder = [
|
||||
// "fire-engine",
|
||||
// useFireEngine ? undefined : "playwright",
|
||||
// ...defaultOrder.filter(
|
||||
// (scraper) => scraper !== "fire-engine" && scraper !== "playwright"
|
||||
// ),
|
||||
// ].filter(Boolean);
|
||||
// }
|
||||
|
||||
const filteredDefaultOrder = defaultOrder.filter(
|
||||
(scraper: (typeof baseScrapers)[number]) =>
|
||||
|
|
|
@ -6,7 +6,7 @@ const RATE_LIMITS = {
|
|||
crawl: {
|
||||
default: 3,
|
||||
free: 2,
|
||||
starter: 3,
|
||||
starter: 10,
|
||||
standard: 5,
|
||||
standardOld: 40,
|
||||
scale: 50,
|
||||
|
@ -19,9 +19,9 @@ const RATE_LIMITS = {
|
|||
scrape: {
|
||||
default: 20,
|
||||
free: 10,
|
||||
starter: 20,
|
||||
starter: 100,
|
||||
standard: 100,
|
||||
standardOld: 40,
|
||||
standardOld: 100,
|
||||
scale: 500,
|
||||
hobby: 20,
|
||||
standardNew: 100,
|
||||
|
@ -32,8 +32,8 @@ const RATE_LIMITS = {
|
|||
search: {
|
||||
default: 20,
|
||||
free: 5,
|
||||
starter: 20,
|
||||
standard: 40,
|
||||
starter: 50,
|
||||
standard: 50,
|
||||
standardOld: 40,
|
||||
scale: 500,
|
||||
hobby: 10,
|
||||
|
@ -45,9 +45,9 @@ const RATE_LIMITS = {
|
|||
map:{
|
||||
default: 20,
|
||||
free: 5,
|
||||
starter: 20,
|
||||
standard: 40,
|
||||
standardOld: 40,
|
||||
starter: 50,
|
||||
standard: 50,
|
||||
standardOld: 50,
|
||||
scale: 500,
|
||||
hobby: 10,
|
||||
standardNew: 50,
|
||||
|
|
Loading…
Reference in New Issue
Block a user