From b46e859a30f351dd171fc65e93fdf2e00e8a5805 Mon Sep 17 00:00:00 2001 From: "yanlong.wang" Date: Wed, 10 Apr 2024 19:43:53 +0800 Subject: [PATCH] wip --- backend/functions/src/cloud-functions/crawler.ts | 1 - backend/functions/src/services/puppeteer.ts | 13 +++++-------- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/backend/functions/src/cloud-functions/crawler.ts b/backend/functions/src/cloud-functions/crawler.ts index 4d366ed..1e899a1 100644 --- a/backend/functions/src/cloud-functions/crawler.ts +++ b/backend/functions/src/cloud-functions/crawler.ts @@ -41,7 +41,6 @@ export class CrawlerHost extends RPCHost { try { for await (const scrapped of this.puppeteerControl.scrap(url)) { - this.logger.info(`Scrapped: ${scrapped.snapshot}`); const content = typeof scrapped.snapshot === 'string' ? scrapped.snapshot : (scrapped.snapshot as any)?.content; if (!content) { continue; diff --git a/backend/functions/src/services/puppeteer.ts b/backend/functions/src/services/puppeteer.ts index 33514ee..499ce52 100644 --- a/backend/functions/src/services/puppeteer.ts +++ b/backend/functions/src/services/puppeteer.ts @@ -42,7 +42,7 @@ export class PuppeteerControl extends AsyncService { await this.browser.close(); } this.browser = await puppeteer.launch({ - headless: false, + headless: true, args: ['--no-sandbox', '--disable-setuid-sandbox'], }); this.browser.once('disconnected', () => { @@ -67,8 +67,7 @@ export class PuppeteerControl extends AsyncService { await page.evaluateOnNewDocument(READABILITY_JS); await page.evaluateOnNewDocument(() => { - // @ts-expect-error - window.giveSnapshot() = () => { + function giveSnapshot() { // @ts-expect-error return new Readability(document.cloneNode(true)).parse(); }; @@ -79,9 +78,7 @@ export class PuppeteerControl extends AsyncService { return; } - // @ts-expect-error - const parsed = window.giveSnapshot(); - console.log(parsed); + const parsed = giveSnapshot(); if (parsed) { // @ts-expect-error window.reportSnapshot(parsed); @@ -91,7 +88,7 @@ export class PuppeteerControl extends AsyncService { } aftershot = setTimeout(() => { // @ts-expect-error - window.reportSnapshot(window.giveSnapshot()); + window.reportSnapshot(giveSnapshot()); }, 500); } }; @@ -130,7 +127,7 @@ export class PuppeteerControl extends AsyncService { const screenshot = await page.screenshot(); if (finalized) { await gotoPromise; - snapshot = await page.evaluate('window.giveSnapshot()'); + snapshot = await page.evaluate('new Readability(document.cloneNode(true)).parse()'); yield { snapshot, screenshot }; break; }