mirror of
https://github.com/intergalacticalvariable/reader.git
synced 2024-11-16 11:42:32 +08:00
more console logs
This commit is contained in:
parent
f6ee7ca6e5
commit
19dc9df9cb
|
@ -401,17 +401,27 @@ export class CrawlerHost extends RPCHost {
|
||||||
let toBeTurnedToMd = jsDomElementOfHTML;
|
let toBeTurnedToMd = jsDomElementOfHTML;
|
||||||
let turnDownService = this.getTurndown({ url: snapshot.rebase || nominalUrl, imgDataUrlToObjectUrl });
|
let turnDownService = this.getTurndown({ url: snapshot.rebase || nominalUrl, imgDataUrlToObjectUrl });
|
||||||
if (mode !== 'markdown' && snapshot.parsed?.content) {
|
if (mode !== 'markdown' && snapshot.parsed?.content) {
|
||||||
|
console.log('Processing parsed content for non-markdown mode');
|
||||||
const jsDomElementOfParsed = this.jsdomControl.snippetToElement(snapshot.parsed.content, snapshot.href);
|
const jsDomElementOfParsed = this.jsdomControl.snippetToElement(snapshot.parsed.content, snapshot.href);
|
||||||
|
console.log('Created jsDomElementOfParsed');
|
||||||
const par1 = this.jsdomControl.runTurndown(turnDownService, jsDomElementOfHTML);
|
const par1 = this.jsdomControl.runTurndown(turnDownService, jsDomElementOfHTML);
|
||||||
|
console.log('Generated par1 from jsDomElementOfHTML');
|
||||||
const par2 = snapshot.parsed.content ? this.jsdomControl.runTurndown(turnDownService, jsDomElementOfParsed) : '';
|
const par2 = snapshot.parsed.content ? this.jsdomControl.runTurndown(turnDownService, jsDomElementOfParsed) : '';
|
||||||
|
console.log('Generated par2 from jsDomElementOfParsed');
|
||||||
|
|
||||||
// If Readability did its job
|
// If Readability did its job
|
||||||
if (par2.length >= 0.3 * par1.length) {
|
if (par2.length >= 0.3 * par1.length) {
|
||||||
|
console.log('Readability seems to have done its job, adjusting turnDownService');
|
||||||
turnDownService = this.getTurndown({ noRules: true, url: snapshot.rebase || nominalUrl, imgDataUrlToObjectUrl });
|
turnDownService = this.getTurndown({ noRules: true, url: snapshot.rebase || nominalUrl, imgDataUrlToObjectUrl });
|
||||||
if (snapshot.parsed.content) {
|
if (snapshot.parsed.content) {
|
||||||
|
console.log('Using parsed content for toBeTurnedToMd');
|
||||||
toBeTurnedToMd = jsDomElementOfParsed;
|
toBeTurnedToMd = jsDomElementOfParsed;
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
console.log('Readability output not sufficient, using original HTML');
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
console.log('Skipping parsed content processing');
|
||||||
}
|
}
|
||||||
|
|
||||||
for (const plugin of this.turnDownPlugins) {
|
for (const plugin of this.turnDownPlugins) {
|
||||||
|
@ -588,6 +598,7 @@ ${suffixMixins.length ? `\n${suffixMixins.join('\n\n')}\n` : ''}`;
|
||||||
console.log('Crawl method called with request:', req.url);
|
console.log('Crawl method called with request:', req.url);
|
||||||
// const rpcReflect: RPCReflection = {};
|
// const rpcReflect: RPCReflection = {};
|
||||||
const ctx = { req, res };
|
const ctx = { req, res };
|
||||||
|
console.log(`req.headers: ${JSON.stringify(req.headers)}`);
|
||||||
const crawlerOptionsHeaderOnly = CrawlerOptionsHeaderOnly.from(req.headers);
|
const crawlerOptionsHeaderOnly = CrawlerOptionsHeaderOnly.from(req.headers);
|
||||||
const crawlerOptionsParamsAllowed = CrawlerOptions.from(req.method === 'POST' ? req.body : req.query);
|
const crawlerOptionsParamsAllowed = CrawlerOptions.from(req.method === 'POST' ? req.body : req.query);
|
||||||
const noSlashURL = ctx.req.url.slice(1);
|
const noSlashURL = ctx.req.url.slice(1);
|
||||||
|
|
Loading…
Reference in New Issue
Block a user