fix: ignore match all target selectors for performance

This commit is contained in:
yanlong.wang 2024-07-31 14:06:22 +08:00
parent a08218506e
commit 8b7af6d076
No known key found for this signature in database
GPG Key ID: C0A623C0BADF9F37

View File

@ -237,6 +237,7 @@ export class CrawlerOptions extends AutoCastable {
instance.targetSelector ??= targetSelector;
const waitForSelector = ctx?.req.get('x-wait-for-selector')?.split(', ');
instance.waitForSelector ??= waitForSelector || instance.targetSelector;
instance.targetSelector = filterSelector(instance.targetSelector);
const overrideUserAgent = ctx?.req.get('x-user-agent');
instance.userAgent ??= overrideUserAgent;
@ -286,3 +287,20 @@ export class CrawlerOptionsHeaderOnly extends CrawlerOptions {
return instance;
}
}
function filterSelector(s?: string | string[]) {
if (!s) {
return s;
}
const sr = Array.isArray(s) ? s : [s];
const selectors = sr.filter((i)=> {
const innerSelectors = i.split(',').map((s) => s.trim());
const someViolation = innerSelectors.find((x) => x.startsWith('*') || x.startsWith(':') || x.includes('*:'));
if (someViolation) {
return false;
}
return true;
})
return selectors;
};