Merge pull request #449 from mendableai/bugfix/malformed-url-sitemap

Added regex for links in sitemap
This commit is contained in:
Nicolas 2024-07-24 20:37:35 -04:00 committed by GitHub
commit 2c1221750b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -64,7 +64,7 @@ export class WebCrawler {
private filterLinks(sitemapLinks: string[], limit: number, maxDepth: number): string[] {
return sitemapLinks
.filter((link) => {
const url = new URL(link);
const url = new URL(link.trim(), this.baseUrl);
const path = url.pathname;
const depth = getURLDepth(url.toString());