This commit is contained in:
rafaelsideguide 2024-09-10 09:09:43 -03:00
commit 1074e976cd
6 changed files with 38 additions and 27 deletions

View File

@ -30,7 +30,14 @@ export const url = z.preprocess(
"URL must have a valid top-level domain or be a valid path"
)
.refine(
(x) => checkUrl(x as string),
(x) => {
try {
checkUrl(x as string)
return true;
} catch (_) {
return false;
}
},
"Invalid URL"
)
.refine(

View File

@ -201,16 +201,20 @@ if (cluster.isMaster) {
Sentry.setupExpressErrorHandler(app);
app.use((err: unknown, req: Request<{}, ErrorResponse, undefined>, res: ResponseWithSentry<ErrorResponse>, next: NextFunction) => {
if (err instanceof SyntaxError && 'status' in err && err.status === 400 && 'body' in err) {
return res.status(400).json({ success: false, error: 'Bad request, malformed JSON' });
}
const id = res.sentry ?? uuidv4();
let verbose = JSON.stringify(err);
if (verbose === "{}") {
if (err instanceof Error) {
verbose = JSON.stringify({
message: err.message,
name: err.name,
stack: err.stack,
});
}
if (err instanceof Error) {
verbose = JSON.stringify({
message: err.message,
name: err.name,
stack: err.stack,
});
}
}
Logger.error("Error occurred in request! (" + req.path + ") -- ID " + id + " -- " + verbose);

View File

@ -83,7 +83,7 @@ function idempotencyMiddleware(req: Request, res: Response, next: NextFunction)
}
function blocklistMiddleware(req: Request, res: Response, next: NextFunction) {
if (req.body.url && isUrlBlocked(req.body.url)) {
if (typeof req.body.url === "string" && isUrlBlocked(req.body.url)) {
if (!res.headersSent) {
return res.status(403).json({ success: false, error: "URL is blocked. Firecrawl currently does not support social media scraping due to policy restrictions." });
}

View File

@ -55,7 +55,7 @@ export async function scrapWithFireEngine({
try {
const reqParams = await generateRequestParams(url);
let waitParam = reqParams["params"]?.wait ?? waitFor;
let engineParam = reqParams["params"]?.engine ?? reqParams["params"]?.fireEngineOptions?.engine ?? fireEngineOptions?.engine ?? "playwright";
let engineParam = reqParams["params"]?.engine ?? reqParams["params"]?.fireEngineOptions?.engine ?? fireEngineOptions?.engine ?? "chrome-cdp";
let screenshotParam = reqParams["params"]?.screenshot ?? screenshot;
let fullPageScreenshotParam = reqParams["params"]?.fullPageScreenshot ?? fullPageScreenshot;
let fireEngineOptionsParam : FireEngineOptions = reqParams["params"]?.fireEngineOptions ?? fireEngineOptions;

View File

@ -96,15 +96,15 @@ function getScrapingFallbackOrder(
"fetch",
].filter(Boolean);
if (isWaitPresent || isScreenshotPresent || isHeadersPresent) {
defaultOrder = [
"fire-engine",
useFireEngine ? undefined : "playwright",
...defaultOrder.filter(
(scraper) => scraper !== "fire-engine" && scraper !== "playwright"
),
].filter(Boolean);
}
// if (isWaitPresent || isScreenshotPresent || isHeadersPresent) {
// defaultOrder = [
// "fire-engine",
// useFireEngine ? undefined : "playwright",
// ...defaultOrder.filter(
// (scraper) => scraper !== "fire-engine" && scraper !== "playwright"
// ),
// ].filter(Boolean);
// }
const filteredDefaultOrder = defaultOrder.filter(
(scraper: (typeof baseScrapers)[number]) =>

View File

@ -6,7 +6,7 @@ const RATE_LIMITS = {
crawl: {
default: 3,
free: 2,
starter: 3,
starter: 10,
standard: 5,
standardOld: 40,
scale: 50,
@ -19,9 +19,9 @@ const RATE_LIMITS = {
scrape: {
default: 20,
free: 10,
starter: 20,
starter: 100,
standard: 100,
standardOld: 40,
standardOld: 100,
scale: 500,
hobby: 20,
standardNew: 100,
@ -32,8 +32,8 @@ const RATE_LIMITS = {
search: {
default: 20,
free: 5,
starter: 20,
standard: 40,
starter: 50,
standard: 50,
standardOld: 40,
scale: 500,
hobby: 10,
@ -45,9 +45,9 @@ const RATE_LIMITS = {
map:{
default: 20,
free: 5,
starter: 20,
standard: 40,
standardOld: 40,
starter: 50,
standard: 50,
standardOld: 50,
scale: 500,
hobby: 10,
standardNew: 50,