Update metadata.ts

This commit is contained in:
Nicolas 2024-10-15 21:36:13 -03:00
parent f49552e413
commit 795e5a9228

View File

@ -70,11 +70,12 @@ export function extractMetadata(soup: CheerioAPI, url: string): Metadata {
let pageStatusCode: number | null = null;
let pageError: string | null = null;
const customMetadata: Record<string, string | string[]> = {};
try {
title = soup("title").text() || null;
description = soup('meta[name="description"]').attr("content") || null;
// Assuming the language is part of the URL as per the regex pattern
language = soup('html').attr('lang') || null;
keywords = soup('meta[name="keywords"]').attr("content") || null;
@ -104,6 +105,22 @@ export function extractMetadata(soup: CheerioAPI, url: string): Metadata {
dcDateCreated = soup('meta[name="dc.date.created"]').attr("content") || null;
dctermsCreated = soup('meta[name="dcterms.created"]').attr("content") || null;
// Extract all meta tags for custom metadata
soup("meta").each((i, elem) => {
const name = soup(elem).attr("name") || soup(elem).attr("property");
const content = soup(elem).attr("content");
if (name && content) {
if (customMetadata[name] === undefined) {
customMetadata[name] = content;
} else if (Array.isArray(customMetadata[name])) {
(customMetadata[name] as string[]).push(content);
} else {
customMetadata[name] = [customMetadata[name] as string, content];
}
}
});
} catch (error) {
Logger.error(`Error extracting metadata: ${error}`);
}
@ -141,5 +158,6 @@ export function extractMetadata(soup: CheerioAPI, url: string): Metadata {
...(sourceURL ? { sourceURL } : {}),
...(pageStatusCode ? { pageStatusCode } : {}),
...(pageError ? { pageError } : {}),
...customMetadata,
};
}