diff --git a/backend/functions/package-lock.json b/backend/functions/package-lock.json index 38f0027..4dda188 100644 --- a/backend/functions/package-lock.json +++ b/backend/functions/package-lock.json @@ -14,19 +14,19 @@ "archiver": "^6.0.1", "axios": "^1.3.3", "bcrypt": "^5.1.0", - "civkit": "^0.6.5-7a4ba56", + "civkit": "^0.6.5-047c0d8", "cors": "^2.8.5", "dayjs": "^1.11.9", "express": "^4.19.2", "firebase-admin": "^12.1.0", - "firebase-functions": "^4.8.0", + "firebase-functions": "^4.9.0", "generic-pool": "^3.9.0", "htmlparser2": "^9.0.0", "jose": "^5.1.0", "langdetect": "^0.2.1", "minio": "^7.1.3", "openai": "^4.20.0", - "puppeteer": "^22.6.3", + "puppeteer": "^22.7.1", "puppeteer-extra": "^3.3.6", "puppeteer-extra-plugin-block-resources": "^2.4.3", "puppeteer-extra-plugin-page-proxy": "^2.0.0", @@ -1963,9 +1963,9 @@ "integrity": "sha512-Vvn3zZrhQZkkBE8LSuW3em98c0FwgO4nxzv6OdSxPKJIEKY2bGbHn+mhGIPerzI4twdxaP8/0+06HBpwf345Lw==" }, "node_modules/@puppeteer/browsers": { - "version": "2.2.1", - "resolved": "https://registry.npmjs.org/@puppeteer/browsers/-/browsers-2.2.1.tgz", - "integrity": "sha512-QSXujx4d4ogDamQA8ckkkRieFzDgZEuZuGiey9G7CuDcbnX4iINKWxTPC5Br2AEzY9ICAvcndqgAUFMMKnS/Tw==", + "version": "2.2.3", + "resolved": "https://registry.npmjs.org/@puppeteer/browsers/-/browsers-2.2.3.tgz", + "integrity": "sha512-bJ0UBsk0ESOs6RFcLXOt99a3yTDcOKlzfjad+rhFwdaG1Lu/Wzq58GHYCDTlZ9z6mldf4g+NTb+TXEfe0PpnsQ==", "dependencies": { "debug": "4.3.4", "extract-zip": "2.0.1", @@ -3645,9 +3645,9 @@ } }, "node_modules/chromium-bidi": { - "version": "0.5.17", - "resolved": "https://registry.npmjs.org/chromium-bidi/-/chromium-bidi-0.5.17.tgz", - "integrity": "sha512-BqOuIWUgTPj8ayuBFJUYCCuwIcwjBsb3/614P7tt1bEPJ4i1M0kCdIl0Wi9xhtswBXnfO2bTpTMkHD71H8rJMg==", + "version": "0.5.19", + "resolved": "https://registry.npmjs.org/chromium-bidi/-/chromium-bidi-0.5.19.tgz", + "integrity": "sha512-UA6zL77b7RYCjJkZBsZ0wlvCTD+jTjllZ8f6wdO4buevXgTZYjV+XLB9CiEa2OuuTGGTLnI7eN9I60YxuALGQg==", "dependencies": { "mitt": "3.0.1", "urlpattern-polyfill": "10.0.0", @@ -3674,9 +3674,9 @@ } }, "node_modules/civkit": { - "version": "0.6.5-7a4ba56", - "resolved": "https://registry.npmjs.org/civkit/-/civkit-0.6.5-7a4ba56.tgz", - "integrity": "sha512-WAKnZn7DwuHkjEaH/bGXN4ZSYFvzM06ky1S9LjzHd1Ud+fMd3sEJR0b68BprzqXdeBNB5LyPHO4Gikf1z7J1bA==", + "version": "0.6.5-047c0d8", + "resolved": "https://registry.npmjs.org/civkit/-/civkit-0.6.5-047c0d8.tgz", + "integrity": "sha512-4FWHrkJQHbTD3wjNeihxOzm7GSgQa9BUgSvPOLsfKybeEw9Pv+I94uDUP8PczL1TpHO6hIbIE2KJjzSOx6PYqg==", "dependencies": { "lodash": "^4.17.21", "tslib": "^2.5.0" @@ -4284,9 +4284,9 @@ } }, "node_modules/devtools-protocol": { - "version": "0.0.1262051", - "resolved": "https://registry.npmjs.org/devtools-protocol/-/devtools-protocol-0.0.1262051.tgz", - "integrity": "sha512-YJe4CT5SA8on3Spa+UDtNhEqtuV6Epwz3OZ4HQVLhlRccpZ9/PAYk0/cy/oKxFKRrZPBUPyxympQci4yWNWZ9g==" + "version": "0.0.1273771", + "resolved": "https://registry.npmjs.org/devtools-protocol/-/devtools-protocol-0.0.1273771.tgz", + "integrity": "sha512-QDbb27xcTVReQQW/GHJsdQqGKwYBE7re7gxehj467kKP2DKuYBUj6i2k5LRiAC66J1yZG/9gsxooz/s9pcm0Og==" }, "node_modules/diff-sequences": { "version": "29.6.3", @@ -9464,15 +9464,15 @@ } }, "node_modules/puppeteer": { - "version": "22.6.4", - "resolved": "https://registry.npmjs.org/puppeteer/-/puppeteer-22.6.4.tgz", - "integrity": "sha512-J9hXNwZmuqKDmNMj6kednZH8jzbdX9735NQfQJrq5LRD4nHisAMyW9pCD7glKi+iM7RV9JkesI1MYhdsN+0ZSQ==", + "version": "22.7.1", + "resolved": "https://registry.npmjs.org/puppeteer/-/puppeteer-22.7.1.tgz", + "integrity": "sha512-JBCBCwQ9+dyPp5haqeecgv0N0vgWFx44woUeKJaPeJT8CU3RXrd8F/tqJQbuAmcWlbMhYJSlTJkIFrwVAs6BNA==", "hasInstallScript": true, "dependencies": { - "@puppeteer/browsers": "2.2.1", + "@puppeteer/browsers": "2.2.3", "cosmiconfig": "9.0.0", - "devtools-protocol": "0.0.1262051", - "puppeteer-core": "22.6.4" + "devtools-protocol": "0.0.1273771", + "puppeteer-core": "22.7.1" }, "bin": { "puppeteer": "lib/esm/puppeteer/node/cli.js" @@ -9482,14 +9482,14 @@ } }, "node_modules/puppeteer-core": { - "version": "22.6.4", - "resolved": "https://registry.npmjs.org/puppeteer-core/-/puppeteer-core-22.6.4.tgz", - "integrity": "sha512-QtfJwPmqQec3EHc6LqbEz03vSiuVAr9bYp0TV87dLoreev6ZevsXdLgOfQgoA3GocrsSe/eUf7NRPQ1lQfsc3w==", + "version": "22.7.1", + "resolved": "https://registry.npmjs.org/puppeteer-core/-/puppeteer-core-22.7.1.tgz", + "integrity": "sha512-jD7T7yN7PWGuJmNT0TAEboA26s0VVnvbgCxqgQIF+eNQW2u71ENaV2JwzSJiCHO+e72H4Ue6AgKD9USQ8xAcOQ==", "dependencies": { - "@puppeteer/browsers": "2.2.1", - "chromium-bidi": "0.5.17", + "@puppeteer/browsers": "2.2.3", + "chromium-bidi": "0.5.19", "debug": "4.3.4", - "devtools-protocol": "0.0.1262051", + "devtools-protocol": "0.0.1273771", "ws": "8.16.0" }, "engines": { diff --git a/backend/functions/package.json b/backend/functions/package.json index acc0546..979b34f 100644 --- a/backend/functions/package.json +++ b/backend/functions/package.json @@ -34,19 +34,19 @@ "archiver": "^6.0.1", "axios": "^1.3.3", "bcrypt": "^5.1.0", - "civkit": "^0.6.5-7a4ba56", + "civkit": "^0.6.5-047c0d8", "cors": "^2.8.5", "dayjs": "^1.11.9", "express": "^4.19.2", "firebase-admin": "^12.1.0", - "firebase-functions": "^4.8.0", + "firebase-functions": "^4.9.0", "generic-pool": "^3.9.0", "htmlparser2": "^9.0.0", "jose": "^5.1.0", "langdetect": "^0.2.1", "minio": "^7.1.3", "openai": "^4.20.0", - "puppeteer": "^22.6.3", + "puppeteer": "^22.7.1", "puppeteer-extra": "^3.3.6", "puppeteer-extra-plugin-block-resources": "^2.4.3", "puppeteer-extra-plugin-page-proxy": "^2.0.0", diff --git a/backend/functions/src/index.ts b/backend/functions/src/index.ts index cc42b7e..a6016bb 100644 --- a/backend/functions/src/index.ts +++ b/backend/functions/src/index.ts @@ -1,11 +1,11 @@ import 'reflect-metadata'; +import './shared/lib/doom-domain'; import { initializeApp } from 'firebase-admin/app'; initializeApp(); import { loadModulesDynamically, registry } from './shared'; import path from 'path'; -import { ApplicationError } from 'civkit'; loadModulesDynamically(path.resolve(__dirname, 'cloud-functions')); Object.assign(exports, registry.exportAll()); @@ -16,24 +16,14 @@ Object.assign(exports, registry.exportGrouped({ registry.title = 'reader'; registry.version = '0.1.0'; -process.on('unhandledRejection', (err) => { - // Walk around Firebase runtime bug. - if (err instanceof ApplicationError) { - // Application error shall not crash the process; - return; - } - - // Looks like Firebase runtime does not handle error properly. - // Make sure to quit the process. - process.nextTick(() => process.exit(1)); - - throw err; -}); +process.on('unhandledRejection', (_err) => `Somehow is false alarm in firebase`); process.on('uncaughtException', (err) => { + console.log('Uncaught exception', err); + // Looks like Firebase runtime does not handle error properly. // Make sure to quit the process. process.nextTick(() => process.exit(1)); - + console.error('Uncaught exception, process quit.'); throw err; }); diff --git a/backend/functions/src/services/puppeteer.ts b/backend/functions/src/services/puppeteer.ts index f35315c..1966a1a 100644 --- a/backend/functions/src/services/puppeteer.ts +++ b/backend/functions/src/services/puppeteer.ts @@ -10,6 +10,7 @@ import puppeteer from 'puppeteer-extra'; import puppeteerBlockResources from 'puppeteer-extra-plugin-block-resources'; import puppeteerPageProxy from 'puppeteer-extra-plugin-page-proxy'; +import { ServiceCrashedError } from '../shared/lib/errors'; const READABILITY_JS = fs.readFileSync(require.resolve('@mozilla/readability/Readability.js'), 'utf-8'); @@ -85,7 +86,6 @@ export class PuppeteerControl extends AsyncService { await Promise.race([ (async () => { const ctx = page.browserContext(); - await page.removeExposedFunction('reportSnapshot'); await page.close(); await ctx.close(); })(), delay(5000) @@ -110,6 +110,7 @@ export class PuppeteerControl extends AsyncService { constructor(protected globalLogger: Logger) { super(...arguments); + this.setMaxListeners(2 * this.pagePool.max + 1); } override async init() { @@ -141,12 +142,13 @@ export class PuppeteerControl extends AsyncService { this.browser.once('disconnected', () => { this.logger.warn(`Browser disconnected`); this.emit('crippled'); + process.nextTick(()=> this.serviceReady()); }); this.logger.info(`Browser launched: ${this.browser.process()?.pid}`); this.emit('ready'); - this.__healthCheckInterval = setInterval(() => this.healthCheck(), 30_000); + // this.__healthCheckInterval = setInterval(() => this.healthCheck(), 30_000); } @maxConcurrency(1) @@ -235,6 +237,8 @@ function giveSnapshot() { `)); await Promise.all(preparations); + await page.goto('about:blank', { waitUntil: 'domcontentloaded' }); + await page.evaluateOnNewDocument(` let aftershot = undefined; const handlePageLoad = () => { @@ -262,8 +266,6 @@ document.addEventListener('readystatechange', handlePageLoad); document.addEventListener('load', handlePageLoad); `); - // TODO: further setup the page; - return page; } @@ -272,7 +274,6 @@ document.addEventListener('load', handlePageLoad); const url = parsedUrl.toString(); this.logger.info(`Scraping ${url}`, { url }); - let snapshot: PageSnapshot | undefined; let screenshot: Buffer | undefined; @@ -285,6 +286,11 @@ document.addEventListener('load', handlePageLoad); } let nextSnapshotDeferred = Defer(); + const crippleListener = () => nextSnapshotDeferred.reject(new ServiceCrashedError({ message: `Browser crashed, try again` })); + this.once('crippled', crippleListener); + nextSnapshotDeferred.promise.finally(() => { + this.off('crippled', crippleListener); + }); let finalized = false; const hdl = (s: any) => { if (snapshot === s) { @@ -293,6 +299,10 @@ document.addEventListener('load', handlePageLoad); snapshot = s; nextSnapshotDeferred.resolve(s); nextSnapshotDeferred = Defer(); + this.once('crippled', crippleListener); + nextSnapshotDeferred.promise.finally(() => { + this.off('crippled', crippleListener); + }); }; page.on('snapshot', hdl); diff --git a/thinapps-shared b/thinapps-shared index e2a1d58..a6a3ad4 160000 --- a/thinapps-shared +++ b/thinapps-shared @@ -1 +1 @@ -Subproject commit e2a1d586063f8e8d663c013fa2febe9f621f9f8e +Subproject commit a6a3ad42efc34da243afa25d602b405b92f13379