mirror of
https://github.com/mendableai/firecrawl.git
synced 2024-11-16 11:42:24 +08:00
Nick: weird activity block
This commit is contained in:
parent
c281fe62c0
commit
47123be783
|
@ -15,6 +15,7 @@ const socialMediaBlocklist = [
|
|||
'whatsapp.com',
|
||||
'wechat.com',
|
||||
'telegram.org',
|
||||
'researchhub.com'
|
||||
];
|
||||
|
||||
const allowedKeywords = [
|
||||
|
|
|
@ -173,9 +173,14 @@ async function processJob(job: Job, token: string) {
|
|||
if (!job.data.sitemapped) {
|
||||
if (!sc.cancelled) {
|
||||
const crawler = crawlToCrawler(job.data.crawl_id, sc);
|
||||
|
||||
const links = crawler.filterLinks((data.docs[0].linksOnPage ?? [])
|
||||
.map(href => crawler.filterURL(href.trim(), sc.originUrl))
|
||||
let linksOnPage = [];
|
||||
try{
|
||||
linksOnPage = data.docs[0]?.linksOnPage ?? [];
|
||||
}catch(e){
|
||||
linksOnPage = []
|
||||
}
|
||||
const links = crawler.filterLinks(
|
||||
linksOnPage.map(href => crawler.filterURL(href.trim(), sc.originUrl))
|
||||
.filter(x => x !== null),
|
||||
Infinity,
|
||||
sc.crawlerOptions?.maxDepth ?? 10
|
||||
|
|
Loading…
Reference in New Issue
Block a user