mirror of
https://github.com/intergalacticalvariable/reader.git
synced 2024-11-16 11:42:32 +08:00
fix: detect when readability does not work
This commit is contained in:
parent
eaa06781e3
commit
579f259cb9
|
@ -327,8 +327,19 @@ export class CrawlerHost extends RPCHost {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
const toBeTurnedToMd = mode === 'markdown' ? snapshot.html : snapshot.parsed?.content;
|
let toBeTurnedToMd = snapshot.html;
|
||||||
let turnDownService = mode === 'markdown' ? this.getTurndown({ url: snapshot.href }) : this.getTurndown({ noRules: true, url: snapshot.href });
|
let turnDownService = this.getTurndown({ url: nominalUrl });
|
||||||
|
if (mode !== 'markdown' && snapshot.parsed?.content) {
|
||||||
|
const par1 = turnDownService.turndown(toBeTurnedToMd);
|
||||||
|
const par2 = turnDownService.turndown(snapshot.parsed.content)
|
||||||
|
|
||||||
|
// If Readability did its job
|
||||||
|
if (par2.length >= 0.3 * par1.length) {
|
||||||
|
turnDownService = this.getTurndown({ noRules: true, url: snapshot.href });
|
||||||
|
toBeTurnedToMd = snapshot.parsed.content;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
for (const plugin of this.turnDownPlugins) {
|
for (const plugin of this.turnDownPlugins) {
|
||||||
turnDownService = turnDownService.use(plugin);
|
turnDownService = turnDownService.use(plugin);
|
||||||
}
|
}
|
||||||
|
@ -585,7 +596,7 @@ ${suffixMixins.length ? `\n${suffixMixins.join('\n\n')}\n` : ''}`;
|
||||||
let urlToCrawl;
|
let urlToCrawl;
|
||||||
const normalizeUrl = (await pNormalizeUrl).default;
|
const normalizeUrl = (await pNormalizeUrl).default;
|
||||||
try {
|
try {
|
||||||
urlToCrawl = new URL(normalizeUrl(noSlashURL.trim(), { stripWWW: false, removeTrailingSlash: false, removeSingleSlash: false, sortQueryParameters:false }));
|
urlToCrawl = new URL(normalizeUrl(noSlashURL.trim(), { stripWWW: false, removeTrailingSlash: false, removeSingleSlash: false, sortQueryParameters: false }));
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
throw new ParamValidationError({
|
throw new ParamValidationError({
|
||||||
message: `${err}`,
|
message: `${err}`,
|
||||||
|
|
Loading…
Reference in New Issue
Block a user