mirror of
https://github.com/intergalacticalvariable/reader.git
synced 2024-11-16 11:42:32 +08:00
fix: Implement retry mechanism and improve error handling for scraping function
This commit is contained in:
parent
ddbf0030b4
commit
a3a299fb38
|
@ -35,10 +35,13 @@ export const crawler = https.onRequest(async (req, res) => {
|
||||||
// registry.title = 'reader';
|
// registry.title = 'reader';
|
||||||
// registry.version = '0.1.0';
|
// registry.version = '0.1.0';
|
||||||
|
|
||||||
process.on('unhandledRejection', (_err) => `Somehow is false alarm in firebase`);
|
process.on('unhandledRejection', (reason, promise) => {
|
||||||
|
console.error('Unhandled Rejection at:', promise, 'reason:', reason);
|
||||||
|
// Application specific logging, throwing an error, or other logic here
|
||||||
|
});
|
||||||
|
|
||||||
process.on('uncaughtException', (err) => {
|
process.on('uncaughtException', (err) => {
|
||||||
console.log('Uncaught exception', err);
|
console.error('Uncaught Exception:', err);
|
||||||
|
|
||||||
// Looks like Firebase runtime does not handle error properly.
|
// Looks like Firebase runtime does not handle error properly.
|
||||||
// Make sure to quit the process.
|
// Make sure to quit the process.
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
import os from 'os';
|
import os from 'os';
|
||||||
import fs from 'fs';
|
import fs from 'fs';
|
||||||
import { container, singleton } from 'tsyringe';
|
import { container, singleton } from 'tsyringe';
|
||||||
import { AsyncService, Defer, marshalErrorLike, AssertionFailureError, delay, maxConcurrency } from 'civkit';
|
import { AsyncService, Defer, marshalErrorLike, AssertionFailureError, delay, maxConcurrency, retry } from 'civkit';
|
||||||
import { Logger } from '../shared/index';
|
import { Logger } from '../shared/index';
|
||||||
|
|
||||||
import type { Browser, CookieParam, Page } from 'puppeteer';
|
import type { Browser, CookieParam, Page } from 'puppeteer';
|
||||||
|
@ -461,6 +461,7 @@ document.addEventListener('load', handlePageLoad);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@retry({ times: 3, delay: 1000 })
|
||||||
async *scrap(parsedUrl: URL, options?: ScrappingOptions): AsyncGenerator<PageSnapshot | undefined> {
|
async *scrap(parsedUrl: URL, options?: ScrappingOptions): AsyncGenerator<PageSnapshot | undefined> {
|
||||||
// parsedUrl.search = '';
|
// parsedUrl.search = '';
|
||||||
const url = parsedUrl.toString();
|
const url = parsedUrl.toString();
|
||||||
|
@ -468,8 +469,12 @@ document.addEventListener('load', handlePageLoad);
|
||||||
let snapshot: PageSnapshot | undefined;
|
let snapshot: PageSnapshot | undefined;
|
||||||
let screenshot: Buffer | undefined;
|
let screenshot: Buffer | undefined;
|
||||||
let pageshot: Buffer | undefined;
|
let pageshot: Buffer | undefined;
|
||||||
const page = await this.getNextPage();
|
let page: Page | null = null;
|
||||||
const sn = this.snMap.get(page);
|
let sn: number | undefined;
|
||||||
|
|
||||||
|
try {
|
||||||
|
page = await this.getNextPage();
|
||||||
|
sn = this.snMap.get(page);
|
||||||
this.logger.info(`Page ${sn}: Scraping ${url}`, { url });
|
this.logger.info(`Page ${sn}: Scraping ${url}`, { url });
|
||||||
if (options?.proxyUrl) {
|
if (options?.proxyUrl) {
|
||||||
await page.useProxy(options.proxyUrl);
|
await page.useProxy(options.proxyUrl);
|
||||||
|
@ -653,11 +658,16 @@ document.addEventListener('load', handlePageLoad);
|
||||||
throw error;
|
throw error;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
} catch (error) {
|
||||||
|
this.logger.error(`Error scraping ${url}`, { error: marshalErrorLike(error) });
|
||||||
|
throw error;
|
||||||
} finally {
|
} finally {
|
||||||
|
if (page) {
|
||||||
(waitForPromise ? Promise.allSettled([gotoPromise, waitForPromise]) : gotoPromise).finally(() => {
|
(waitForPromise ? Promise.allSettled([gotoPromise, waitForPromise]) : gotoPromise).finally(() => {
|
||||||
page.off('snapshot', hdl);
|
page!.off('snapshot', hdl);
|
||||||
this.ditchPage(page);
|
this.ditchPage(page!);
|
||||||
});
|
});
|
||||||
|
}
|
||||||
nextSnapshotDeferred.resolve();
|
nextSnapshotDeferred.resolve();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user