mirror of
https://github.com/mendableai/firecrawl.git
synced 2024-11-16 11:42:24 +08:00
Merge pull request #869 from mendableai/fix/new-url-on-utils-extract-links
[BUG] Added trycatch and removed redundancy
This commit is contained in:
commit
ae5ba74e2d
|
@ -29,27 +29,28 @@ export function extractLinks(html: string, baseUrl: string): string[] {
|
|||
const $ = cheerio.load(html);
|
||||
const links: string[] = [];
|
||||
|
||||
// Parse the base URL to get the origin
|
||||
const urlObject = new URL(baseUrl);
|
||||
const origin = urlObject.origin;
|
||||
|
||||
$('a').each((_, element) => {
|
||||
const href = $(element).attr('href');
|
||||
if (href) {
|
||||
if (href.startsWith('http://') || href.startsWith('https://')) {
|
||||
// Absolute URL, add as is
|
||||
links.push(href);
|
||||
} else if (href.startsWith('/')) {
|
||||
// Relative URL starting with '/', append to origin
|
||||
links.push(new URL(href, baseUrl).href);
|
||||
} else if (!href.startsWith('#') && !href.startsWith('mailto:')) {
|
||||
// Relative URL not starting with '/', append to base URL
|
||||
links.push(new URL(href, baseUrl).href);
|
||||
} else if (href.startsWith('mailto:')) {
|
||||
// mailto: links, add as is
|
||||
links.push(href);
|
||||
try {
|
||||
if (href.startsWith('http://') || href.startsWith('https://')) {
|
||||
// Absolute URL, add as is
|
||||
links.push(href);
|
||||
} else if (href.startsWith('/')) {
|
||||
// Relative URL starting with '/', append to base URL
|
||||
links.push(new URL(href, baseUrl).href);
|
||||
} else if (!href.startsWith('#') && !href.startsWith('mailto:')) {
|
||||
// Relative URL not starting with '/', append to base URL
|
||||
links.push(new URL(href, baseUrl).href);
|
||||
} else if (href.startsWith('mailto:')) {
|
||||
// mailto: links, add as is
|
||||
links.push(href);
|
||||
}
|
||||
// Fragment-only links (#) are ignored
|
||||
} catch (error) {
|
||||
// Log the error and continue
|
||||
console.error(`Failed to construct URL for href: ${href} with base: ${baseUrl}`, error);
|
||||
}
|
||||
// Fragment-only links (#) are ignored
|
||||
}
|
||||
});
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user