Nick: weird activity block

This commit is contained in:
Nicolas 2024-08-16 22:01:56 -04:00
parent c281fe62c0
commit 47123be783
2 changed files with 9 additions and 3 deletions

View File

@ -15,6 +15,7 @@ const socialMediaBlocklist = [
'whatsapp.com', 'whatsapp.com',
'wechat.com', 'wechat.com',
'telegram.org', 'telegram.org',
'researchhub.com'
]; ];
const allowedKeywords = [ const allowedKeywords = [

View File

@ -173,9 +173,14 @@ async function processJob(job: Job, token: string) {
if (!job.data.sitemapped) { if (!job.data.sitemapped) {
if (!sc.cancelled) { if (!sc.cancelled) {
const crawler = crawlToCrawler(job.data.crawl_id, sc); const crawler = crawlToCrawler(job.data.crawl_id, sc);
let linksOnPage = [];
const links = crawler.filterLinks((data.docs[0].linksOnPage ?? []) try{
.map(href => crawler.filterURL(href.trim(), sc.originUrl)) linksOnPage = data.docs[0]?.linksOnPage ?? [];
}catch(e){
linksOnPage = []
}
const links = crawler.filterLinks(
linksOnPage.map(href => crawler.filterURL(href.trim(), sc.originUrl))
.filter(x => x !== null), .filter(x => x !== null),
Infinity, Infinity,
sc.crawlerOptions?.maxDepth ?? 10 sc.crawlerOptions?.maxDepth ?? 10