mirror of
https://github.com/intergalacticalvariable/reader.git
synced 2024-11-16 11:42:32 +08:00
fix
This commit is contained in:
parent
629ab270be
commit
78c8444096
|
@ -18,7 +18,8 @@
|
|||
"from-preset": "npm run build && npm run emu:reset && npm run emu:start",
|
||||
"start": "npm run shell",
|
||||
"deploy": "firebase deploy --only functions",
|
||||
"logs": "firebase functions:log"
|
||||
"logs": "firebase functions:log",
|
||||
"gcp-build": "npx puppeteer browsers install chrome"
|
||||
},
|
||||
"engines": {
|
||||
"node": "18"
|
||||
|
|
|
@ -36,16 +36,16 @@ export class CrawlerHost extends RPCHost {
|
|||
|
||||
const formatted = {
|
||||
title: (snapshot.parsed?.title || snapshot.title || '').trim(),
|
||||
urlSource: snapshot.href.trim(),
|
||||
markdownContent: contentText.trim(),
|
||||
url: snapshot.href.trim(),
|
||||
content: contentText.trim(),
|
||||
|
||||
toString() {
|
||||
return `Title: ${this.title}
|
||||
|
||||
URL Source: ${this.urlSource}
|
||||
URL Source: ${this.url}
|
||||
|
||||
Markdown Content:
|
||||
${contentText}
|
||||
${this.content}
|
||||
`;
|
||||
}
|
||||
};
|
||||
|
|
|
@ -145,7 +145,7 @@ function giveSnapshot() {
|
|||
|
||||
async *scrap(url: string, noCache: string | boolean = false) {
|
||||
const parsedUrl = new URL(url);
|
||||
parsedUrl.search = '';
|
||||
// parsedUrl.search = '';
|
||||
parsedUrl.hash = '';
|
||||
const normalizedUrl = parsedUrl.toString().toLowerCase();
|
||||
const digest = md5Hasher.hash(normalizedUrl);
|
||||
|
@ -191,7 +191,17 @@ function giveSnapshot() {
|
|||
page.on('snapshot', hdl);
|
||||
|
||||
const gotoPromise = page.goto(url, { waitUntil: ['load', 'domcontentloaded', 'networkidle0'], timeout: 30_000 })
|
||||
.then(async (r) => {
|
||||
.catch((err) => {
|
||||
this.logger.warn(`Browsing of ${url} did not fully succeed`, { err: marshalErrorLike(err) });
|
||||
return Promise.reject(new AssertionFailureError({
|
||||
message: `Failed to goto ${url}: ${err}`,
|
||||
cause: err,
|
||||
}));
|
||||
}).finally(async () => {
|
||||
finalized = true;
|
||||
if (!snapshot?.html) {
|
||||
return;
|
||||
}
|
||||
screenshot = await page.screenshot({
|
||||
type: 'jpeg',
|
||||
quality: 85,
|
||||
|
@ -210,16 +220,6 @@ function giveSnapshot() {
|
|||
).catch((err) => {
|
||||
this.logger.warn(`Failed to save snapshot`, { err: marshalErrorLike(err) });
|
||||
});
|
||||
|
||||
return r;
|
||||
}).catch((err) => {
|
||||
this.logger.warn(`Failed to goto ${url}`, { err: marshalErrorLike(err) });
|
||||
return Promise.reject(new AssertionFailureError({
|
||||
message: `Failed to goto ${url}: ${err}`,
|
||||
cause: err,
|
||||
}));
|
||||
}).finally(() => {
|
||||
finalized = true;
|
||||
});
|
||||
|
||||
try {
|
||||
|
|
Loading…
Reference in New Issue
Block a user