Nick: weird activity block

This commit is contained in:
Nicolas 2024-08-16 22:01:56 -04:00
parent c281fe62c0
commit 47123be783
2 changed files with 9 additions and 3 deletions

View File

@ -15,6 +15,7 @@ const socialMediaBlocklist = [
'whatsapp.com',
'wechat.com',
'telegram.org',
'researchhub.com'
];
const allowedKeywords = [

View File

@ -173,9 +173,14 @@ async function processJob(job: Job, token: string) {
if (!job.data.sitemapped) {
if (!sc.cancelled) {
const crawler = crawlToCrawler(job.data.crawl_id, sc);
const links = crawler.filterLinks((data.docs[0].linksOnPage ?? [])
.map(href => crawler.filterURL(href.trim(), sc.originUrl))
let linksOnPage = [];
try{
linksOnPage = data.docs[0]?.linksOnPage ?? [];
}catch(e){
linksOnPage = []
}
const links = crawler.filterLinks(
linksOnPage.map(href => crawler.filterURL(href.trim(), sc.originUrl))
.filter(x => x !== null),
Infinity,
sc.crawlerOptions?.maxDepth ?? 10