mirror of
https://github.com/intergalacticalvariable/reader.git
synced 2024-11-16 11:42:32 +08:00
fix: give expireAt for image cache
This commit is contained in:
parent
4f284f51b6
commit
8a2b095bd7
|
@ -53,8 +53,6 @@ export class CrawlerHost extends RPCHost {
|
|||
|
||||
turnDownPlugins = [require('turndown-plugin-gfm').gfm];
|
||||
|
||||
imageShortUrlPrefix?: string;
|
||||
|
||||
constructor(
|
||||
protected globalLogger: Logger,
|
||||
protected puppeteerControl: PuppeteerControl,
|
||||
|
@ -78,13 +76,13 @@ export class CrawlerHost extends RPCHost {
|
|||
|
||||
let contentText = '';
|
||||
if (toBeTurnedToMd) {
|
||||
const urlToAltMap: { [k: string]: { shortDigest: string, alt?: string; }; } = {};
|
||||
const urlToAltMap: { [k: string]: string | undefined; } = {};
|
||||
const tasks = (snapshot.imgs || []).map(async (x) => {
|
||||
const r = await this.altTextService.getAltTextAndShortDigest(x).catch((err)=> {
|
||||
const r = await this.altTextService.getAltText(x).catch((err: any) => {
|
||||
this.logger.warn(`Failed to get alt text for ${x.src}`, { err: marshalErrorLike(err) });
|
||||
return undefined;
|
||||
});
|
||||
if (r) {
|
||||
if (r && x.src) {
|
||||
urlToAltMap[x.src.trim()] = r;
|
||||
}
|
||||
});
|
||||
|
@ -103,7 +101,7 @@ export class CrawlerHost extends RPCHost {
|
|||
const mapped = urlToAltMap[src];
|
||||
imgIdx++;
|
||||
if (mapped) {
|
||||
return `![Image ${imgIdx}: ${mapped.alt || alt}](${this.imageShortUrlPrefix ? `${this.imageShortUrlPrefix}/${mapped.shortDigest}` : src})`;
|
||||
return `![Image ${imgIdx}: ${mapped || alt}](${src})`;
|
||||
}
|
||||
return `![Image ${imgIdx}: ${alt}](${src})`;
|
||||
}
|
||||
|
@ -115,7 +113,7 @@ export class CrawlerHost extends RPCHost {
|
|||
if (!contentText || (contentText.startsWith('<') && contentText.endsWith('>'))) {
|
||||
contentText = turnDownService.turndown(snapshot.html);
|
||||
}
|
||||
if (!contentText || (contentText.startsWith('<') && contentText.endsWith('>'))) {
|
||||
if (!contentText || (contentText.startsWith('<') || contentText.endsWith('>'))) {
|
||||
contentText = snapshot.text;
|
||||
}
|
||||
|
||||
|
|
|
@ -44,32 +44,33 @@ export class AltTextService extends AsyncService {
|
|||
}
|
||||
}
|
||||
|
||||
async getAltTextAndShortDigest(imgBrief: ImgBrief) {
|
||||
async getAltText(imgBrief: ImgBrief) {
|
||||
if (!imgBrief.src) {
|
||||
return undefined;
|
||||
}
|
||||
if (imgBrief.alt) {
|
||||
return imgBrief.alt;
|
||||
}
|
||||
const digest = md5Hasher.hash(imgBrief.src);
|
||||
const shortDigest = Buffer.from(digest, 'hex').toString('base64url');
|
||||
|
||||
const existing = await ImgAlt.fromFirestore(shortDigest);
|
||||
|
||||
if (existing?.generatedAlt) {
|
||||
return {
|
||||
shortDigest,
|
||||
alt: existing.generatedAlt,
|
||||
};
|
||||
if (existing) {
|
||||
return existing.generatedAlt || existing.originalAlt || '';
|
||||
}
|
||||
|
||||
let generatedCaption;
|
||||
let generatedCaption = '';
|
||||
|
||||
if (!imgBrief.alt) {
|
||||
try {
|
||||
generatedCaption = await this.caption(imgBrief.src);
|
||||
} catch (err) {
|
||||
this.logger.warn(`Unable to generate alt text for ${imgBrief.src}`, { err });
|
||||
}
|
||||
try {
|
||||
generatedCaption = await this.caption(imgBrief.src);
|
||||
} catch (err) {
|
||||
this.logger.warn(`Unable to generate alt text for ${imgBrief.src}`, { err });
|
||||
}
|
||||
|
||||
// Don't try again until the next day
|
||||
const expireMixin = generatedCaption ? {} : { expireAt: new Date(Date.now() + 1000 * 3600 * 24) };
|
||||
|
||||
await ImgAlt.COLLECTION.doc(shortDigest).set(
|
||||
{
|
||||
_id: shortDigest,
|
||||
|
@ -79,13 +80,11 @@ export class AltTextService extends AsyncService {
|
|||
urlDigest: digest,
|
||||
originalAlt: imgBrief.alt || '',
|
||||
generatedAlt: generatedCaption || '',
|
||||
createdAt: new Date()
|
||||
createdAt: new Date(),
|
||||
...expireMixin
|
||||
}, { merge: true }
|
||||
);
|
||||
|
||||
return {
|
||||
shortDigest,
|
||||
alt: generatedCaption,
|
||||
};
|
||||
return generatedCaption;
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue
Block a user