fix: give expireAt for image cache

This commit is contained in:
yanlong.wang 2024-04-16 15:20:46 +08:00
parent 4f284f51b6
commit 8a2b095bd7
No known key found for this signature in database
GPG Key ID: C0A623C0BADF9F37
2 changed files with 22 additions and 25 deletions

View File

@ -53,8 +53,6 @@ export class CrawlerHost extends RPCHost {
turnDownPlugins = [require('turndown-plugin-gfm').gfm];
imageShortUrlPrefix?: string;
constructor(
protected globalLogger: Logger,
protected puppeteerControl: PuppeteerControl,
@ -78,13 +76,13 @@ export class CrawlerHost extends RPCHost {
let contentText = '';
if (toBeTurnedToMd) {
const urlToAltMap: { [k: string]: { shortDigest: string, alt?: string; }; } = {};
const urlToAltMap: { [k: string]: string | undefined; } = {};
const tasks = (snapshot.imgs || []).map(async (x) => {
const r = await this.altTextService.getAltTextAndShortDigest(x).catch((err)=> {
const r = await this.altTextService.getAltText(x).catch((err: any) => {
this.logger.warn(`Failed to get alt text for ${x.src}`, { err: marshalErrorLike(err) });
return undefined;
});
if (r) {
if (r && x.src) {
urlToAltMap[x.src.trim()] = r;
}
});
@ -103,7 +101,7 @@ export class CrawlerHost extends RPCHost {
const mapped = urlToAltMap[src];
imgIdx++;
if (mapped) {
return `![Image ${imgIdx}: ${mapped.alt || alt}](${this.imageShortUrlPrefix ? `${this.imageShortUrlPrefix}/${mapped.shortDigest}` : src})`;
return `![Image ${imgIdx}: ${mapped || alt}](${src})`;
}
return `![Image ${imgIdx}: ${alt}](${src})`;
}
@ -115,7 +113,7 @@ export class CrawlerHost extends RPCHost {
if (!contentText || (contentText.startsWith('<') && contentText.endsWith('>'))) {
contentText = turnDownService.turndown(snapshot.html);
}
if (!contentText || (contentText.startsWith('<') && contentText.endsWith('>'))) {
if (!contentText || (contentText.startsWith('<') || contentText.endsWith('>'))) {
contentText = snapshot.text;
}

View File

@ -44,32 +44,33 @@ export class AltTextService extends AsyncService {
}
}
async getAltTextAndShortDigest(imgBrief: ImgBrief) {
async getAltText(imgBrief: ImgBrief) {
if (!imgBrief.src) {
return undefined;
}
if (imgBrief.alt) {
return imgBrief.alt;
}
const digest = md5Hasher.hash(imgBrief.src);
const shortDigest = Buffer.from(digest, 'hex').toString('base64url');
const existing = await ImgAlt.fromFirestore(shortDigest);
if (existing?.generatedAlt) {
return {
shortDigest,
alt: existing.generatedAlt,
};
if (existing) {
return existing.generatedAlt || existing.originalAlt || '';
}
let generatedCaption;
let generatedCaption = '';
if (!imgBrief.alt) {
try {
generatedCaption = await this.caption(imgBrief.src);
} catch (err) {
this.logger.warn(`Unable to generate alt text for ${imgBrief.src}`, { err });
}
try {
generatedCaption = await this.caption(imgBrief.src);
} catch (err) {
this.logger.warn(`Unable to generate alt text for ${imgBrief.src}`, { err });
}
// Don't try again until the next day
const expireMixin = generatedCaption ? {} : { expireAt: new Date(Date.now() + 1000 * 3600 * 24) };
await ImgAlt.COLLECTION.doc(shortDigest).set(
{
_id: shortDigest,
@ -79,13 +80,11 @@ export class AltTextService extends AsyncService {
urlDigest: digest,
originalAlt: imgBrief.alt || '',
generatedAlt: generatedCaption || '',
createdAt: new Date()
createdAt: new Date(),
...expireMixin
}, { merge: true }
);
return {
shortDigest,
alt: generatedCaption,
};
return generatedCaption;
}
}