mirror of
https://github.com/intergalacticalvariable/reader.git
synced 2024-11-16 11:42:32 +08:00
fix: expose publishedTime if possible
This commit is contained in:
parent
6e36f0a447
commit
a211366501
|
@ -123,12 +123,18 @@ export class CrawlerHost extends RPCHost {
|
|||
title: (snapshot.parsed?.title || snapshot.title || '').trim(),
|
||||
url: nominalUrl || snapshot.href?.trim(),
|
||||
content: cleanText,
|
||||
publishedTime: snapshot.parsed?.publishedTime || undefined,
|
||||
|
||||
toString() {
|
||||
const mixins = [];
|
||||
if (this.publishedTime) {
|
||||
mixins.push(`Published Time: ${this.publishedTime}`);
|
||||
}
|
||||
|
||||
return `Title: ${this.title}
|
||||
|
||||
URL Source: ${this.url}
|
||||
|
||||
${mixins.length ? `\n${mixins.join('\n\n')}\n` : ''}
|
||||
Markdown Content:
|
||||
${this.content}
|
||||
`;
|
||||
|
|
|
@ -20,23 +20,25 @@ export interface ImgBrief {
|
|||
alt?: string;
|
||||
}
|
||||
|
||||
export interface ReadabilityParsed {
|
||||
title: string;
|
||||
content: string;
|
||||
textContent: string;
|
||||
length: number;
|
||||
excerpt: string;
|
||||
byline: string;
|
||||
dir: string;
|
||||
siteName: string;
|
||||
lang: string;
|
||||
publishedTime: string;
|
||||
}
|
||||
|
||||
export interface PageSnapshot {
|
||||
title: string;
|
||||
href: string;
|
||||
html: string;
|
||||
text: string;
|
||||
parsed?: {
|
||||
title: string;
|
||||
content: string;
|
||||
textContent: string;
|
||||
length: number;
|
||||
excerpt: string;
|
||||
byline: string;
|
||||
dir: string;
|
||||
siteName: string;
|
||||
lang: string;
|
||||
publishedTime: string;
|
||||
} | null;
|
||||
parsed?: Partial<ReadabilityParsed> | null;
|
||||
screenshot?: Buffer;
|
||||
imgs?: ImgBrief[];
|
||||
}
|
||||
|
@ -121,7 +123,7 @@ export class PuppeteerControl extends AsyncService {
|
|||
// preparations.push(page.setUserAgent(`Slackbot-LinkExpanding 1.0 (+https://api.slack.com/robots)`));
|
||||
// preparations.push(page.setUserAgent(`Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; GPTBot/1.0; +https://openai.com/gptbot)`));
|
||||
preparations.push(page.setBypassCSP(true));
|
||||
preparations.push(page.setViewport({ width: 1920, height: 1080 }));
|
||||
preparations.push(page.setViewport({ width: 1024, height: 1024 }));
|
||||
preparations.push(page.exposeFunction('reportSnapshot', (snapshot: any) => {
|
||||
page.emit('snapshot', snapshot);
|
||||
}));
|
||||
|
@ -262,7 +264,7 @@ function giveSnapshot() {
|
|||
}
|
||||
screenshot = await page.screenshot({
|
||||
type: 'jpeg',
|
||||
quality: 85,
|
||||
quality: 75,
|
||||
});
|
||||
snapshot = await page.evaluate('giveSnapshot()') as PageSnapshot;
|
||||
if (!snapshot.title || !snapshot.parsed?.content) {
|
||||
|
@ -270,7 +272,7 @@ function giveSnapshot() {
|
|||
if (salvaged) {
|
||||
screenshot = await page.screenshot({
|
||||
type: 'jpeg',
|
||||
quality: 85,
|
||||
quality: 75,
|
||||
});
|
||||
snapshot = await page.evaluate('giveSnapshot()') as PageSnapshot;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue
Block a user